config/shared/platform/orangepi5plus/rk3588-v6.9-rc1.patch

From a171ae95d01a28fb196a4ac256e4913795e76c2b Mon Sep 17 00:00:00 2001
From: Christopher Obbard <chris.obbard@collabora.com>
Date: Mon, 20 Feb 2023 16:59:04 +0000
Subject: [PATCH 01/69] [NOUPSTREAM] Add GitLab CI support

Add CI support. This will do the following:

1. Run dt_binding_check to make sure no major flaws were introduced in
   the DT bindings
2. Run dtbs_check, for Rock 5A, Rock 5B and EVB1. If warnings are
   generated the CI will report that as warning
3. Build a Kernel .deb package
4. Generate a test job for LAVA and run it

Co-developed-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Co-developed-by: Sjoerd Simons <sjoerd@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Signed-off-by: Sjoerd Simons <sjoerd@collabora.com>
Signed-off-by: Christopher Obbard <chris.obbard@collabora.com>
---
 .gitignore        |   1 +
 .gitlab-ci.yml    | 142 ++++++++++++++++++++++++++++++++++++++++++++++
 lava/testjob.yaml |  73 ++++++++++++++++++++++++
 3 files changed, 216 insertions(+)

diff --git a/.gitignore b/.gitignore
index c59dc60ba62e..59aa4da11b89 100644
--- a/.gitignore
+++ b/.gitignore
@@ -104,6 +104,7 @@ modules.order
 !.kunitconfig
 !.mailmap
 !.rustfmt.toml
+!.gitlab-ci.yml

 #
 # Generated include files
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 000000000000..5582b71f0c67
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,142 @@
+default:
+  image: debian:testing
+  tags:
+    - bookworm
+
+stages:
+  - build
+  - test
+  - generate
+  - lava
+
+check devicetrees:
+  stage: build
+  tags:
+    - ultra-heavyweight
+  variables:
+    DEBIAN_FRONTEND: noninteractive
+    GIT_SUBMODULE_STRATEGY: normal
+    ARCH: arm64
+    DEFCONFIG: defconfig
+    CROSS_COMPILE: aarch64-linux-gnu-
+    DUT: rockchip/rk3588s-rock-5a.dtb rockchip/rk3588-rock-5b.dtb rockchip/rk3588-evb1-v10.dtb
+  before_script:
+    - apt update
+    - apt install -y devscripts
+                     build-essential
+                     crossbuild-essential-arm64
+                     bc
+                     bison
+                     flex
+                     swig
+                     python3-dev
+                     python3-pip
+                     yamllint
+    - pip3 install --break-system-packages dtschema
+  script:
+    - make $DEFCONFIG
+    - make -j$(nproc) dt_binding_check
+    - make -j$(nproc) CHECK_DTBS=y $DUT 2> dt-warnings.txt
+    - if [[ $(cat dt-warnings.txt | wc -l) > 0 ]]; then cat dt-warnings.txt; exit 42; fi
+  allow_failure:
+    exit_codes:
+      - 42
+
+.build debian package:
+  stage: build
+  tags:
+    - ultra-heavyweight
+  cache:
+    when: on_success
+    key: $CI_COMMIT_REF_SLUG
+    paths:
+      - ccache
+  variables:
+    DEBIAN_FRONTEND: noninteractive
+    GIT_SUBMODULE_STRATEGY: normal
+    ARCH: amd64
+    DEFCONFIG: defconfig
+    CCACHE_BASEDIR: $CI_PROJECT_DIR
+    CCACHE_DIR: $CI_PROJECT_DIR/ccache
+  before_script:
+    - apt update
+    - apt install -y devscripts
+                     build-essential
+                     crossbuild-essential-arm64
+                     bc
+                     bison
+                     ccache
+                     flex
+                     rsync
+                     kmod
+                     cpio
+                     libelf-dev
+                     libssl-dev
+    # Setup ccache
+    - export PATH="/usr/lib/ccache:$PATH"
+    - ccache -s
+  script:
+    - make $DEFCONFIG
+    - ./scripts/config -e WLAN -e WLAN_VENDOR_BROADCOM -m BRCMUTIL -m BRCMFMAC
+                       -e BRCMFMAC_PROTO_BCDC -e BRCMFMAC_PROTO_MSGBUF
+                       -e BRCMFMAC_USB
+                       -e WLAN_VENDOR_REALTEK -m RTW89 -m RTW89_CORE
+                       -m RTW89_PCI -m RTW89_8825B -m RTW89_8852BE
+                       -m BINFMT_MISC
+    - make -j$(nproc) $ADDITIONAL_BUILD_CMD bindeb-pkg
+    - mkdir artifacts && dcmd mv ../*.changes artifacts/
+  artifacts:
+    paths:
+      - artifacts
+
+build arm64 debian package:
+  extends: .build debian package
+  variables:
+    ARCH: arm64
+    CROSS_COMPILE: aarch64-linux-gnu-
+    ADDITIONAL_BUILD_CMD: KBUILD_IMAGE=arch/arm64/boot/Image
+
+generate tests:
+  image: debian:bookworm-slim
+  stage: generate
+  tags:
+    - lightweight
+  variables:
+    GIT_STRATEGY: fetch
+    GIT_DEPTH: "1"
+  needs:
+    - "build arm64 debian package"
+  before_script:
+    - apt update
+    - apt install -y wget
+  script:
+    - mkdir deb
+    - "for x in artifacts/linux-image*.deb ; do dpkg -x ${x} deb ; done"
+    - cp deb/boot/vmlinuz* vmlinuz
+    - mkdir -p deb/lib/firmware/arm/mali/arch10.8
+    - wget -O deb/lib/firmware/arm/mali/arch10.8/mali_csffw.bin "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/plain/arm/mali/arch10.8/mali_csffw.bin"
+    - tar -f modules.tar.gz  -C deb  -c -z -v lib/modules lib/firmware
+    - mkdir dtbs
+    - cp -r deb/usr/lib/linux-image*/* dtbs
+    - sed -i s,%%KERNEL_BUILD_JOB%%,${CI_JOB_ID},g lava/testjob.yaml
+  artifacts:
+    paths:
+      - vmlinuz*
+      - modules.tar.gz
+      - dtbs
+      - lava/testjob.yaml
+
+lava test:
+  stage: lava
+  tags:
+    - lava-runner
+  script:
+    - submit lava/testjob.yaml
+  needs:
+    - "generate tests"
+  artifacts:
+    when: always
+    paths:
+      - "*"
+    reports:
+      junit: "*.xml"
diff --git a/lava/testjob.yaml b/lava/testjob.yaml
new file mode 100644
index 000000000000..8dfaf772296c
--- /dev/null
+++ b/lava/testjob.yaml
@@ -0,0 +1,73 @@
+device_type:  rk3588-rock-5b
+
+job_name: Hardware enablement tests {{job.CI_JOB_ID}}
+timeouts:
+  job:
+    minutes: 15
+  action:
+   minutes: 5
+priority: high
+visibility: public
+
+context:
+  extra_kernel_args: rootwait
+
+actions:
+  - deploy:
+      timeout:
+        minutes: 2
+      to: tftp
+      kernel:
+        url: "{{job.CI_PROJECT_URL}}/-/jobs/%%KERNEL_BUILD_JOB%%/artifacts/raw/vmlinuz"
+        type: image
+      modules:
+        url: "{{job.CI_PROJECT_URL}}/-/jobs/%%KERNEL_BUILD_JOB%%/artifacts/raw/modules.tar.gz"
+        compression: gz
+      dtb:
+        url: "{{job.CI_PROJECT_URL}}/-/jobs/%%KERNEL_BUILD_JOB%%/artifacts/raw/dtbs/rockchip/rk3588-rock-5b.dtb"
+      ramdisk:
+        url: https://gitlab.collabora.com/lava/health-check-images/-/jobs/artifacts/bookworm/raw/bookworm/bookworm-rootfs-arm64-initramfs.gz?job=build+bookworm+image:+%5Barm64,+rootfs%5D
+        compression: gz
+      nfsrootfs:
+        url: https://gitlab.collabora.com/lava/health-check-images/-/jobs/artifacts/bookworm/raw/bookworm/bookworm-rootfs-arm64.tar.gz?job=build+bookworm+image:+%5Barm64,+rootfs%5D
+        compression: gz
+
+  - boot:
+      method: u-boot
+      commands: nfs
+      timeout:
+        minutes: 10
+      auto_login:
+        login_prompt: 'login:'
+        username: user
+        password_prompt: 'Password:'
+        password: user
+        login_commands:
+          - sudo su
+          - env
+          - systemctl --failed
+      prompts:
+        - 'user@health(.*)$'
+        - 'root@health(.*)#'
+
+  - test:
+      timeout:
+        minutes: 1
+      definitions:
+      - repository:
+          metadata:
+            format: Lava-Test Test Definition 1.0
+            name: health
+            description: "health check"
+            os:
+              - apertis
+            scope:
+              - functional
+            environment:
+              - lava-test-shell
+          run:
+            steps:
+              - ip a s
+        from: inline
+        name: network
+        path: inline/health.yaml
--
2.42.0


From 57bc5336965240093c0ebdba5a4061e7e48c8a7b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:15 +0100
Subject: [PATCH 02/69] [MERGED] drm/panthor: Add uAPI

Panthor follows the lead of other recently submitted drivers with
ioctls allowing us to support modern Vulkan features, like sparse memory
binding:

- Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET),
  with the 'exclusive-VM' bit to speed-up BO reservation on job submission
- VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND
  ioctl is loosely based on the Xe model, and can handle both
  asynchronous and synchronous requests
- GPU execution context creation/destruction, tiler heap context creation
  and job submission. Those ioctls reflect how the hardware/scheduler
  works and are thus driver specific.

We also have a way to expose IO regions, such that the usermode driver
can directly access specific/well-isolate registers, like the
LATEST_FLUSH register used to implement cache-flush reduction.

This uAPI intentionally keeps usermode queues out of the scope, which
explains why doorbell registers and command stream ring-buffers are not
directly exposed to userspace.

v6:
- Add Maxime's and Heiko's acks

v5:
- Fix typo
- Add Liviu's R-b

v4:
- Add a VM_GET_STATE ioctl
- Fix doc
- Expose the CORE_FEATURES register so we can deal with variants in the
  UMD
- Add Steve's R-b

v3:
- Add the concept of sync-only VM operation
- Fix support for 32-bit userspace
- Rework drm_panthor_vm_create to pass the user VA size instead of
  the kernel VA size (suggested by Robin Murphy)
- Typo fixes
- Explicitly cast enums with top bit set to avoid compiler warnings in
  -pedantic mode.
- Drop property core_group_count as it can be easily calculated by the
  number of bits set in l2_present.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-2-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 Documentation/gpu/driver-uapi.rst |   5 +
 include/uapi/drm/panthor_drm.h    | 945 ++++++++++++++++++++++++++++++
 2 files changed, 950 insertions(+)

diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst
index e5070a0e95ab..971cdb4816fc 100644
--- a/Documentation/gpu/driver-uapi.rst
+++ b/Documentation/gpu/driver-uapi.rst
@@ -18,6 +18,11 @@ VM_BIND / EXEC uAPI

 .. kernel-doc:: include/uapi/drm/nouveau_drm.h

+drm/panthor uAPI
+================
+
+.. kernel-doc:: include/uapi/drm/panthor_drm.h
+
 drm/xe uAPI
 ===========

diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h
new file mode 100644
index 000000000000..373df80f41ed
--- /dev/null
+++ b/include/uapi/drm/panthor_drm.h
@@ -0,0 +1,945 @@
+/* SPDX-License-Identifier: MIT */
+/* Copyright (C) 2023 Collabora ltd. */
+#ifndef _PANTHOR_DRM_H_
+#define _PANTHOR_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/**
+ * DOC: Introduction
+ *
+ * This documentation describes the Panthor IOCTLs.
+ *
+ * Just a few generic rules about the data passed to the Panthor IOCTLs:
+ *
+ * - Structures must be aligned on 64-bit/8-byte. If the object is not
+ *   naturally aligned, a padding field must be added.
+ * - Fields must be explicitly aligned to their natural type alignment with
+ *   pad[0..N] fields.
+ * - All padding fields will be checked by the driver to make sure they are
+ *   zeroed.
+ * - Flags can be added, but not removed/replaced.
+ * - New fields can be added to the main structures (the structures
+ *   directly passed to the ioctl). Those fields can be added at the end of
+ *   the structure, or replace existing padding fields. Any new field being
+ *   added must preserve the behavior that existed before those fields were
+ *   added when a value of zero is passed.
+ * - New fields can be added to indirect objects (objects pointed by the
+ *   main structure), iff those objects are passed a size to reflect the
+ *   size known by the userspace driver (see drm_panthor_obj_array::stride
+ *   or drm_panthor_dev_query::size).
+ * - If the kernel driver is too old to know some fields, those will be
+ *   ignored if zero, and otherwise rejected (and so will be zero on output).
+ * - If userspace is too old to know some fields, those will be zeroed
+ *   (input) before the structure is parsed by the kernel driver.
+ * - Each new flag/field addition must come with a driver version update so
+ *   the userspace driver doesn't have to trial and error to know which
+ *   flags are supported.
+ * - Structures should not contain unions, as this would defeat the
+ *   extensibility of such structures.
+ * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed
+ *   at the end of the drm_panthor_ioctl_id enum.
+ */
+
+/**
+ * DOC: MMIO regions exposed to userspace.
+ *
+ * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET
+ *
+ * File offset for all MMIO regions being exposed to userspace. Don't use
+ * this value directly, use DRM_PANTHOR_USER_<name>_OFFSET values instead.
+ * pgoffset passed to mmap2() is an unsigned long, which forces us to use a
+ * different offset on 32-bit and 64-bit systems.
+ *
+ * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET
+ *
+ * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls
+ * GPU cache flushing through CS instructions, but the flush reduction
+ * mechanism requires a flush_id. This flush_id could be queried with an
+ * ioctl, but Arm provides a well-isolated register page containing only this
+ * read-only register, so let's expose this page through a static mmap offset
+ * and allow direct mapping of this MMIO region so we can avoid the
+ * user <-> kernel round-trip.
+ */
+#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT	(1ull << 43)
+#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT	(1ull << 56)
+#define DRM_PANTHOR_USER_MMIO_OFFSET		(sizeof(unsigned long) < 8 ? \
+						 DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \
+						 DRM_PANTHOR_USER_MMIO_OFFSET_64BIT)
+#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET	(DRM_PANTHOR_USER_MMIO_OFFSET | 0)
+
+/**
+ * DOC: IOCTL IDs
+ *
+ * enum drm_panthor_ioctl_id - IOCTL IDs
+ *
+ * Place new ioctls at the end, don't re-order, don't replace or remove entries.
+ *
+ * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx
+ * definitions instead.
+ */
+enum drm_panthor_ioctl_id {
+	/** @DRM_PANTHOR_DEV_QUERY: Query device information. */
+	DRM_PANTHOR_DEV_QUERY = 0,
+
+	/** @DRM_PANTHOR_VM_CREATE: Create a VM. */
+	DRM_PANTHOR_VM_CREATE,
+
+	/** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */
+	DRM_PANTHOR_VM_DESTROY,
+
+	/** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */
+	DRM_PANTHOR_VM_BIND,
+
+	/** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */
+	DRM_PANTHOR_VM_GET_STATE,
+
+	/** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */
+	DRM_PANTHOR_BO_CREATE,
+
+	/**
+	 * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to
+	 * mmap to map a GEM object.
+	 */
+	DRM_PANTHOR_BO_MMAP_OFFSET,
+
+	/** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */
+	DRM_PANTHOR_GROUP_CREATE,
+
+	/** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */
+	DRM_PANTHOR_GROUP_DESTROY,
+
+	/**
+	 * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging
+	 * to a specific scheduling group.
+	 */
+	DRM_PANTHOR_GROUP_SUBMIT,
+
+	/** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */
+	DRM_PANTHOR_GROUP_GET_STATE,
+
+	/** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */
+	DRM_PANTHOR_TILER_HEAP_CREATE,
+
+	/** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */
+	DRM_PANTHOR_TILER_HEAP_DESTROY,
+};
+
+/**
+ * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number
+ * @__access: Access type. Must be R, W or RW.
+ * @__id: One of the DRM_PANTHOR_xxx id.
+ * @__type: Suffix of the type being passed to the IOCTL.
+ *
+ * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx
+ * values instead.
+ *
+ * Return: An IOCTL number to be passed to ioctl() from userspace.
+ */
+#define DRM_IOCTL_PANTHOR(__access, __id, __type) \
+	DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \
+			   struct drm_panthor_ ## __type)
+
+#define DRM_IOCTL_PANTHOR_DEV_QUERY \
+	DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query)
+#define DRM_IOCTL_PANTHOR_VM_CREATE \
+	DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create)
+#define DRM_IOCTL_PANTHOR_VM_DESTROY \
+	DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy)
+#define DRM_IOCTL_PANTHOR_VM_BIND \
+	DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind)
+#define DRM_IOCTL_PANTHOR_VM_GET_STATE \
+	DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state)
+#define DRM_IOCTL_PANTHOR_BO_CREATE \
+	DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create)
+#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \
+	DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset)
+#define DRM_IOCTL_PANTHOR_GROUP_CREATE \
+	DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create)
+#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \
+	DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy)
+#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \
+	DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit)
+#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \
+	DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state)
+#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \
+	DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create)
+#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \
+	DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy)
+
+/**
+ * DOC: IOCTL arguments
+ */
+
+/**
+ * struct drm_panthor_obj_array - Object array.
+ *
+ * This object is used to pass an array of objects whose size is subject to changes in
+ * future versions of the driver. In order to support this mutability, we pass a stride
+ * describing the size of the object as known by userspace.
+ *
+ * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use
+ * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to
+ * the object size.
+ */
+struct drm_panthor_obj_array {
+	/** @stride: Stride of object struct. Used for versioning. */
+	__u32 stride;
+
+	/** @count: Number of objects in the array. */
+	__u32 count;
+
+	/** @array: User pointer to an array of objects. */
+	__u64 array;
+};
+
+/**
+ * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field.
+ * @cnt: Number of elements in the array.
+ * @ptr: Pointer to the array to pass to the kernel.
+ *
+ * Macro initializing a drm_panthor_obj_array based on the object size as known
+ * by userspace.
+ */
+#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \
+	{ .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) }
+
+/**
+ * enum drm_panthor_sync_op_flags - Synchronization operation flags.
+ */
+enum drm_panthor_sync_op_flags {
+	/** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */
+	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff,
+
+	/** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */
+	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0,
+
+	/**
+	 * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization
+	 * object type.
+	 */
+	DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1,
+
+	/** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */
+	DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31,
+
+	/** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */
+	DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31),
+};
+
+/**
+ * struct drm_panthor_sync_op - Synchronization operation.
+ */
+struct drm_panthor_sync_op {
+	/** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */
+	__u32 flags;
+
+	/** @handle: Sync handle. */
+	__u32 handle;
+
+	/**
+	 * @timeline_value: MBZ if
+	 * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) !=
+	 * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ.
+	 */
+	__u64 timeline_value;
+};
+
+/**
+ * enum drm_panthor_dev_query_type - Query type
+ *
+ * Place new types at the end, don't re-order, don't remove or replace.
+ */
+enum drm_panthor_dev_query_type {
+	/** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */
+	DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0,
+
+	/** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */
+	DRM_PANTHOR_DEV_QUERY_CSIF_INFO,
+};
+
+/**
+ * struct drm_panthor_gpu_info - GPU information
+ *
+ * Structure grouping all queryable information relating to the GPU.
+ */
+struct drm_panthor_gpu_info {
+	/** @gpu_id : GPU ID. */
+	__u32 gpu_id;
+#define DRM_PANTHOR_ARCH_MAJOR(x)		((x) >> 28)
+#define DRM_PANTHOR_ARCH_MINOR(x)		(((x) >> 24) & 0xf)
+#define DRM_PANTHOR_ARCH_REV(x)			(((x) >> 20) & 0xf)
+#define DRM_PANTHOR_PRODUCT_MAJOR(x)		(((x) >> 16) & 0xf)
+#define DRM_PANTHOR_VERSION_MAJOR(x)		(((x) >> 12) & 0xf)
+#define DRM_PANTHOR_VERSION_MINOR(x)		(((x) >> 4) & 0xff)
+#define DRM_PANTHOR_VERSION_STATUS(x)		((x) & 0xf)
+
+	/** @gpu_rev: GPU revision. */
+	__u32 gpu_rev;
+
+	/** @csf_id: Command stream frontend ID. */
+	__u32 csf_id;
+#define DRM_PANTHOR_CSHW_MAJOR(x)		(((x) >> 26) & 0x3f)
+#define DRM_PANTHOR_CSHW_MINOR(x)		(((x) >> 20) & 0x3f)
+#define DRM_PANTHOR_CSHW_REV(x)			(((x) >> 16) & 0xf)
+#define DRM_PANTHOR_MCU_MAJOR(x)		(((x) >> 10) & 0x3f)
+#define DRM_PANTHOR_MCU_MINOR(x)		(((x) >> 4) & 0x3f)
+#define DRM_PANTHOR_MCU_REV(x)			((x) & 0xf)
+
+	/** @l2_features: L2-cache features. */
+	__u32 l2_features;
+
+	/** @tiler_features: Tiler features. */
+	__u32 tiler_features;
+
+	/** @mem_features: Memory features. */
+	__u32 mem_features;
+
+	/** @mmu_features: MMU features. */
+	__u32 mmu_features;
+#define DRM_PANTHOR_MMU_VA_BITS(x)		((x) & 0xff)
+
+	/** @thread_features: Thread features. */
+	__u32 thread_features;
+
+	/** @max_threads: Maximum number of threads. */
+	__u32 max_threads;
+
+	/** @thread_max_workgroup_size: Maximum workgroup size. */
+	__u32 thread_max_workgroup_size;
+
+	/**
+	 * @thread_max_barrier_size: Maximum number of threads that can wait
+	 * simultaneously on a barrier.
+	 */
+	__u32 thread_max_barrier_size;
+
+	/** @coherency_features: Coherency features. */
+	__u32 coherency_features;
+
+	/** @texture_features: Texture features. */
+	__u32 texture_features[4];
+
+	/** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */
+	__u32 as_present;
+
+	/** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */
+	__u64 shader_present;
+
+	/** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */
+	__u64 l2_present;
+
+	/** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */
+	__u64 tiler_present;
+
+	/* @core_features: Used to discriminate core variants when they exist. */
+	__u32 core_features;
+
+	/* @pad: MBZ. */
+	__u32 pad;
+};
+
+/**
+ * struct drm_panthor_csif_info - Command stream interface information
+ *
+ * Structure grouping all queryable information relating to the command stream interface.
+ */
+struct drm_panthor_csif_info {
+	/** @csg_slot_count: Number of command stream group slots exposed by the firmware. */
+	__u32 csg_slot_count;
+
+	/** @cs_slot_count: Number of command stream slots per group. */
+	__u32 cs_slot_count;
+
+	/** @cs_reg_count: Number of command stream registers. */
+	__u32 cs_reg_count;
+
+	/** @scoreboard_slot_count: Number of scoreboard slots. */
+	__u32 scoreboard_slot_count;
+
+	/**
+	 * @unpreserved_cs_reg_count: Number of command stream registers reserved by
+	 * the kernel driver to call a userspace command stream.
+	 *
+	 * All registers can be used by a userspace command stream, but the
+	 * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are
+	 * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called.
+	 */
+	__u32 unpreserved_cs_reg_count;
+
+	/**
+	 * @pad: Padding field, set to zero.
+	 */
+	__u32 pad;
+};
+
+/**
+ * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY
+ */
+struct drm_panthor_dev_query {
+	/** @type: the query type (see drm_panthor_dev_query_type). */
+	__u32 type;
+
+	/**
+	 * @size: size of the type being queried.
+	 *
+	 * If pointer is NULL, size is updated by the driver to provide the
+	 * output structure size. If pointer is not NULL, the driver will
+	 * only copy min(size, actual_structure_size) bytes to the pointer,
+	 * and update the size accordingly. This allows us to extend query
+	 * types without breaking userspace.
+	 */
+	__u32 size;
+
+	/**
+	 * @pointer: user pointer to a query type struct.
+	 *
+	 * Pointer can be NULL, in which case, nothing is copied, but the
+	 * actual structure size is returned. If not NULL, it must point to
+	 * a location that's large enough to hold size bytes.
+	 */
+	__u64 pointer;
+};
+
+/**
+ * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE
+ */
+struct drm_panthor_vm_create {
+	/** @flags: VM flags, MBZ. */
+	__u32 flags;
+
+	/** @id: Returned VM ID. */
+	__u32 id;
+
+	/**
+	 * @user_va_range: Size of the VA space reserved for user objects.
+	 *
+	 * The kernel will pick the remaining space to map kernel-only objects to the
+	 * VM (heap chunks, heap context, ring buffers, kernel synchronization objects,
+	 * ...). If the space left for kernel objects is too small, kernel object
+	 * allocation will fail further down the road. One can use
+	 * drm_panthor_gpu_info::mmu_features to extract the total virtual address
+	 * range, and chose a user_va_range that leaves some space to the kernel.
+	 *
+	 * If user_va_range is zero, the kernel will pick a sensible value based on
+	 * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user
+	 * split should leave enough VA space for userspace processes to support SVM,
+	 * while still allowing the kernel to map some amount of kernel objects in
+	 * the kernel VA range). The value chosen by the driver will be returned in
+	 * @user_va_range.
+	 *
+	 * User VA space always starts at 0x0, kernel VA space is always placed after
+	 * the user VA range.
+	 */
+	__u64 user_va_range;
+};
+
+/**
+ * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY
+ */
+struct drm_panthor_vm_destroy {
+	/** @id: ID of the VM to destroy. */
+	__u32 id;
+
+	/** @pad: MBZ. */
+	__u32 pad;
+};
+
+/**
+ * enum drm_panthor_vm_bind_op_flags - VM bind operation flags
+ */
+enum drm_panthor_vm_bind_op_flags {
+	/**
+	 * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only.
+	 *
+	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
+	 */
+	DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0,
+
+	/**
+	 * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable.
+	 *
+	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
+	 */
+	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1,
+
+	/**
+	 * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached.
+	 *
+	 * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP.
+	 */
+	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2,
+
+	/**
+	 * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation.
+	 */
+	DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28),
+
+	/** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */
+	DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28,
+
+	/** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */
+	DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28,
+
+	/**
+	 * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation.
+	 *
+	 * Just serves as a synchronization point on a VM queue.
+	 *
+	 * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags,
+	 * and drm_panthor_vm_bind_op::syncs contains at least one element.
+	 */
+	DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28,
+};
+
+/**
+ * struct drm_panthor_vm_bind_op - VM bind operation
+ */
+struct drm_panthor_vm_bind_op {
+	/** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */
+	__u32 flags;
+
+	/**
+	 * @bo_handle: Handle of the buffer object to map.
+	 * MBZ for unmap or sync-only operations.
+	 */
+	__u32 bo_handle;
+
+	/**
+	 * @bo_offset: Buffer object offset.
+	 * MBZ for unmap or sync-only operations.
+	 */
+	__u64 bo_offset;
+
+	/**
+	 * @va: Virtual address to map/unmap.
+	 * MBZ for sync-only operations.
+	 */
+	__u64 va;
+
+	/**
+	 * @size: Size to map/unmap.
+	 * MBZ for sync-only operations.
+	 */
+	__u64 size;
+
+	/**
+	 * @syncs: Array of struct drm_panthor_sync_op synchronization
+	 * operations.
+	 *
+	 * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on
+	 * the drm_panthor_vm_bind object containing this VM bind operation.
+	 *
+	 * This array shall not be empty for sync-only operations.
+	 */
+	struct drm_panthor_obj_array syncs;
+
+};
+
+/**
+ * enum drm_panthor_vm_bind_flags - VM bind flags
+ */
+enum drm_panthor_vm_bind_flags {
+	/**
+	 * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM
+	 * queue instead of being executed synchronously.
+	 */
+	DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0,
+};
+
+/**
+ * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND
+ */
+struct drm_panthor_vm_bind {
+	/** @vm_id: VM targeted by the bind request. */
+	__u32 vm_id;
+
+	/** @flags: Combination of drm_panthor_vm_bind_flags flags. */
+	__u32 flags;
+
+	/** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */
+	struct drm_panthor_obj_array ops;
+};
+
+/**
+ * enum drm_panthor_vm_state - VM states.
+ */
+enum drm_panthor_vm_state {
+	/**
+	 * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable.
+	 *
+	 * New VM operations will be accepted on this VM.
+	 */
+	DRM_PANTHOR_VM_STATE_USABLE,
+
+	/**
+	 * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable.
+	 *
+	 * Something put the VM in an unusable state (like an asynchronous
+	 * VM_BIND request failing for any reason).
+	 *
+	 * Once the VM is in this state, all new MAP operations will be
+	 * rejected, and any GPU job targeting this VM will fail.
+	 * UNMAP operations are still accepted.
+	 *
+	 * The only way to recover from an unusable VM is to create a new
+	 * VM, and destroy the old one.
+	 */
+	DRM_PANTHOR_VM_STATE_UNUSABLE,
+};
+
+/**
+ * struct drm_panthor_vm_get_state - Get VM state.
+ */
+struct drm_panthor_vm_get_state {
+	/** @vm_id: VM targeted by the get_state request. */
+	__u32 vm_id;
+
+	/**
+	 * @state: state returned by the driver.
+	 *
+	 * Must be one of the enum drm_panthor_vm_state values.
+	 */
+	__u32 state;
+};
+
+/**
+ * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time.
+ */
+enum drm_panthor_bo_flags {
+	/** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
+	DRM_PANTHOR_BO_NO_MMAP = (1 << 0),
+};
+
+/**
+ * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE.
+ */
+struct drm_panthor_bo_create {
+	/**
+	 * @size: Requested size for the object
+	 *
+	 * The (page-aligned) allocated size for the object will be returned.
+	 */
+	__u64 size;
+
+	/**
+	 * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags.
+	 */
+	__u32 flags;
+
+	/**
+	 * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to.
+	 *
+	 * If not zero, the field must refer to a valid VM ID, and implies that:
+	 *  - the buffer object will only ever be bound to that VM
+	 *  - cannot be exported as a PRIME fd
+	 */
+	__u32 exclusive_vm_id;
+
+	/**
+	 * @handle: Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+
+	/** @pad: MBZ. */
+	__u32 pad;
+};
+
+/**
+ * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET.
+ */
+struct drm_panthor_bo_mmap_offset {
+	/** @handle: Handle of the object we want an mmap offset for. */
+	__u32 handle;
+
+	/** @pad: MBZ. */
+	__u32 pad;
+
+	/** @offset: The fake offset to use for subsequent mmap calls. */
+	__u64 offset;
+};
+
+/**
+ * struct drm_panthor_queue_create - Queue creation arguments.
+ */
+struct drm_panthor_queue_create {
+	/**
+	 * @priority: Defines the priority of queues inside a group. Goes from 0 to 15,
+	 * 15 being the highest priority.
+	 */
+	__u8 priority;
+
+	/** @pad: Padding fields, MBZ. */
+	__u8 pad[3];
+
+	/** @ringbuf_size: Size of the ring buffer to allocate to this queue. */
+	__u32 ringbuf_size;
+};
+
+/**
+ * enum drm_panthor_group_priority - Scheduling group priority
+ */
+enum drm_panthor_group_priority {
+	/** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */
+	PANTHOR_GROUP_PRIORITY_LOW = 0,
+
+	/** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */
+	PANTHOR_GROUP_PRIORITY_MEDIUM,
+
+	/** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */
+	PANTHOR_GROUP_PRIORITY_HIGH,
+};
+
+/**
+ * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE
+ */
+struct drm_panthor_group_create {
+	/** @queues: Array of drm_panthor_queue_create elements. */
+	struct drm_panthor_obj_array queues;
+
+	/**
+	 * @max_compute_cores: Maximum number of cores that can be used by compute
+	 * jobs across CS queues bound to this group.
+	 *
+	 * Must be less or equal to the number of bits set in @compute_core_mask.
+	 */
+	__u8 max_compute_cores;
+
+	/**
+	 * @max_fragment_cores: Maximum number of cores that can be used by fragment
+	 * jobs across CS queues bound to this group.
+	 *
+	 * Must be less or equal to the number of bits set in @fragment_core_mask.
+	 */
+	__u8 max_fragment_cores;
+
+	/**
+	 * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs
+	 * across CS queues bound to this group.
+	 *
+	 * Must be less or equal to the number of bits set in @tiler_core_mask.
+	 */
+	__u8 max_tiler_cores;
+
+	/** @priority: Group priority (see enum drm_panthor_group_priority). */
+	__u8 priority;
+
+	/** @pad: Padding field, MBZ. */
+	__u32 pad;
+
+	/**
+	 * @compute_core_mask: Mask encoding cores that can be used for compute jobs.
+	 *
+	 * This field must have at least @max_compute_cores bits set.
+	 *
+	 * The bits set here should also be set in drm_panthor_gpu_info::shader_present.
+	 */
+	__u64 compute_core_mask;
+
+	/**
+	 * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs.
+	 *
+	 * This field must have at least @max_fragment_cores bits set.
+	 *
+	 * The bits set here should also be set in drm_panthor_gpu_info::shader_present.
+	 */
+	__u64 fragment_core_mask;
+
+	/**
+	 * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs.
+	 *
+	 * This field must have at least @max_tiler_cores bits set.
+	 *
+	 * The bits set here should also be set in drm_panthor_gpu_info::tiler_present.
+	 */
+	__u64 tiler_core_mask;
+
+	/**
+	 * @vm_id: VM ID to bind this group to.
+	 *
+	 * All submission to queues bound to this group will use this VM.
+	 */
+	__u32 vm_id;
+
+	/**
+	 * @group_handle: Returned group handle. Passed back when submitting jobs or
+	 * destroying a group.
+	 */
+	__u32 group_handle;
+};
+
+/**
+ * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY
+ */
+struct drm_panthor_group_destroy {
+	/** @group_handle: Group to destroy */
+	__u32 group_handle;
+
+	/** @pad: Padding field, MBZ. */
+	__u32 pad;
+};
+
+/**
+ * struct drm_panthor_queue_submit - Job submission arguments.
+ *
+ * This is describing the userspace command stream to call from the kernel
+ * command stream ring-buffer. Queue submission is always part of a group
+ * submission, taking one or more jobs to submit to the underlying queues.
+ */
+struct drm_panthor_queue_submit {
+	/** @queue_index: Index of the queue inside a group. */
+	__u32 queue_index;
+
+	/**
+	 * @stream_size: Size of the command stream to execute.
+	 *
+	 * Must be 64-bit/8-byte aligned (the size of a CS instruction)
+	 *
+	 * Can be zero if stream_addr is zero too.
+	 */
+	__u32 stream_size;
+
+	/**
+	 * @stream_addr: GPU address of the command stream to execute.
+	 *
+	 * Must be aligned on 64-byte.
+	 *
+	 * Can be zero is stream_size is zero too.
+	 */
+	__u64 stream_addr;
+
+	/**
+	 * @latest_flush: FLUSH_ID read at the time the stream was built.
+	 *
+	 * This allows cache flush elimination for the automatic
+	 * flush+invalidate(all) done at submission time, which is needed to
+	 * ensure the GPU doesn't get garbage when reading the indirect command
+	 * stream buffers. If you want the cache flush to happen
+	 * unconditionally, pass a zero here.
+	 */
+	__u32 latest_flush;
+
+	/** @pad: MBZ. */
+	__u32 pad;
+
+	/** @syncs: Array of struct drm_panthor_sync_op sync operations. */
+	struct drm_panthor_obj_array syncs;
+};
+
+/**
+ * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT
+ */
+struct drm_panthor_group_submit {
+	/** @group_handle: Handle of the group to queue jobs to. */
+	__u32 group_handle;
+
+	/** @pad: MBZ. */
+	__u32 pad;
+
+	/** @queue_submits: Array of drm_panthor_queue_submit objects. */
+	struct drm_panthor_obj_array queue_submits;
+};
+
+/**
+ * enum drm_panthor_group_state_flags - Group state flags
+ */
+enum drm_panthor_group_state_flags {
+	/**
+	 * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs.
+	 *
+	 * When a group ends up with this flag set, no jobs can be submitted to its queues.
+	 */
+	DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0,
+
+	/**
+	 * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults.
+	 *
+	 * When a group ends up with this flag set, no jobs can be submitted to its queues.
+	 */
+	DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1,
+};
+
+/**
+ * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE
+ *
+ * Used to query the state of a group and decide whether a new group should be created to
+ * replace it.
+ */
+struct drm_panthor_group_get_state {
+	/** @group_handle: Handle of the group to query state on */
+	__u32 group_handle;
+
+	/**
+	 * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the
+	 * group state.
+	 */
+	__u32 state;
+
+	/** @fatal_queues: Bitmask of queues that faced fatal faults. */
+	__u32 fatal_queues;
+
+	/** @pad: MBZ */
+	__u32 pad;
+};
+
+/**
+ * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE
+ */
+struct drm_panthor_tiler_heap_create {
+	/** @vm_id: VM ID the tiler heap should be mapped to */
+	__u32 vm_id;
+
+	/** @initial_chunk_count: Initial number of chunks to allocate. */
+	__u32 initial_chunk_count;
+
+	/** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */
+	__u32 chunk_size;
+
+	/** @max_chunks: Maximum number of chunks that can be allocated. */
+	__u32 max_chunks;
+
+	/**
+	 * @target_in_flight: Maximum number of in-flight render passes.
+	 *
+	 * If the heap has more than tiler jobs in-flight, the FW will wait for render
+	 * passes to finish before queuing new tiler jobs.
+	 */
+	__u32 target_in_flight;
+
+	/** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */
+	__u32 handle;
+
+	/** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */
+	__u64 tiler_heap_ctx_gpu_va;
+
+	/**
+	 * @first_heap_chunk_gpu_va: First heap chunk.
+	 *
+	 * The tiler heap is formed of heap chunks forming a single-link list. This
+	 * is the first element in the list.
+	 */
+	__u64 first_heap_chunk_gpu_va;
+};
+
+/**
+ * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY
+ */
+struct drm_panthor_tiler_heap_destroy {
+	/** @handle: Handle of the tiler heap to destroy */
+	__u32 handle;
+
+	/** @pad: Padding field, MBZ. */
+	__u32 pad;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _PANTHOR_DRM_H_ */
--
2.42.0


From 3176b3a9cd01486f8649e4bc8f393b83a58335c1 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:16 +0100
Subject: [PATCH 03/69] [MERGED] drm/panthor: Add GPU register definitions

Those are the registers directly accessible through the MMIO range.

FW registers are exposed in panthor_fw.h.

v6:
- Add Maxime's and Heiko's acks

v4:
- Add the CORE_FEATURES register (needed for GPU variants)
- Add Steve's R-b

v3:
- Add macros to extract GPU ID info
- Formatting changes
- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF
- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT
- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-3-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++
 1 file changed, 239 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h
new file mode 100644
index 000000000000..b7b3b3add166
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+/*
+ * Register definitions based on mali_kbase_gpu_regmap.h and
+ * mali_kbase_gpu_regmap_csf.h
+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ */
+#ifndef __PANTHOR_REGS_H__
+#define __PANTHOR_REGS_H__
+
+#define GPU_ID						0x0
+#define   GPU_ARCH_MAJOR(x)				((x) >> 28)
+#define   GPU_ARCH_MINOR(x)				(((x) & GENMASK(27, 24)) >> 24)
+#define   GPU_ARCH_REV(x)				(((x) & GENMASK(23, 20)) >> 20)
+#define   GPU_PROD_MAJOR(x)				(((x) & GENMASK(19, 16)) >> 16)
+#define   GPU_VER_MAJOR(x)				(((x) & GENMASK(15, 12)) >> 12)
+#define   GPU_VER_MINOR(x)				(((x) & GENMASK(11, 4)) >> 4)
+#define   GPU_VER_STATUS(x)				((x) & GENMASK(3, 0))
+
+#define GPU_L2_FEATURES					0x4
+#define  GPU_L2_FEATURES_LINE_SIZE(x)			(1 << ((x) & GENMASK(7, 0)))
+
+#define GPU_CORE_FEATURES				0x8
+
+#define GPU_TILER_FEATURES				0xC
+#define GPU_MEM_FEATURES				0x10
+#define   GROUPS_L2_COHERENT				BIT(0)
+
+#define GPU_MMU_FEATURES				0x14
+#define  GPU_MMU_FEATURES_VA_BITS(x)			((x) & GENMASK(7, 0))
+#define  GPU_MMU_FEATURES_PA_BITS(x)			(((x) >> 8) & GENMASK(7, 0))
+#define GPU_AS_PRESENT					0x18
+#define GPU_CSF_ID					0x1C
+
+#define GPU_INT_RAWSTAT					0x20
+#define GPU_INT_CLEAR					0x24
+#define GPU_INT_MASK					0x28
+#define GPU_INT_STAT					0x2c
+#define   GPU_IRQ_FAULT					BIT(0)
+#define   GPU_IRQ_PROTM_FAULT				BIT(1)
+#define   GPU_IRQ_RESET_COMPLETED			BIT(8)
+#define   GPU_IRQ_POWER_CHANGED				BIT(9)
+#define   GPU_IRQ_POWER_CHANGED_ALL			BIT(10)
+#define   GPU_IRQ_CLEAN_CACHES_COMPLETED		BIT(17)
+#define   GPU_IRQ_DOORBELL_MIRROR			BIT(18)
+#define   GPU_IRQ_MCU_STATUS_CHANGED			BIT(19)
+#define GPU_CMD						0x30
+#define   GPU_CMD_DEF(type, payload)			((type) | ((payload) << 8))
+#define   GPU_SOFT_RESET				GPU_CMD_DEF(1, 1)
+#define   GPU_HARD_RESET				GPU_CMD_DEF(1, 2)
+#define   CACHE_CLEAN					BIT(0)
+#define   CACHE_INV					BIT(1)
+#define   GPU_FLUSH_CACHES(l2, lsc, oth)		\
+	  GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8))
+
+#define GPU_STATUS					0x34
+#define   GPU_STATUS_ACTIVE				BIT(0)
+#define   GPU_STATUS_PWR_ACTIVE				BIT(1)
+#define   GPU_STATUS_PAGE_FAULT				BIT(4)
+#define   GPU_STATUS_PROTM_ACTIVE			BIT(7)
+#define   GPU_STATUS_DBG_ENABLED			BIT(8)
+
+#define GPU_FAULT_STATUS				0x3C
+#define GPU_FAULT_ADDR_LO				0x40
+#define GPU_FAULT_ADDR_HI				0x44
+
+#define GPU_PWR_KEY					0x50
+#define  GPU_PWR_KEY_UNLOCK				0x2968A819
+#define GPU_PWR_OVERRIDE0				0x54
+#define GPU_PWR_OVERRIDE1				0x58
+
+#define GPU_TIMESTAMP_OFFSET_LO				0x88
+#define GPU_TIMESTAMP_OFFSET_HI				0x8C
+#define GPU_CYCLE_COUNT_LO				0x90
+#define GPU_CYCLE_COUNT_HI				0x94
+#define GPU_TIMESTAMP_LO				0x98
+#define GPU_TIMESTAMP_HI				0x9C
+
+#define GPU_THREAD_MAX_THREADS				0xA0
+#define GPU_THREAD_MAX_WORKGROUP_SIZE			0xA4
+#define GPU_THREAD_MAX_BARRIER_SIZE			0xA8
+#define GPU_THREAD_FEATURES				0xAC
+
+#define GPU_TEXTURE_FEATURES(n)				(0xB0 + ((n) * 4))
+
+#define GPU_SHADER_PRESENT_LO				0x100
+#define GPU_SHADER_PRESENT_HI				0x104
+#define GPU_TILER_PRESENT_LO				0x110
+#define GPU_TILER_PRESENT_HI				0x114
+#define GPU_L2_PRESENT_LO				0x120
+#define GPU_L2_PRESENT_HI				0x124
+
+#define SHADER_READY_LO					0x140
+#define SHADER_READY_HI					0x144
+#define TILER_READY_LO					0x150
+#define TILER_READY_HI					0x154
+#define L2_READY_LO					0x160
+#define L2_READY_HI					0x164
+
+#define SHADER_PWRON_LO					0x180
+#define SHADER_PWRON_HI					0x184
+#define TILER_PWRON_LO					0x190
+#define TILER_PWRON_HI					0x194
+#define L2_PWRON_LO					0x1A0
+#define L2_PWRON_HI					0x1A4
+
+#define SHADER_PWROFF_LO				0x1C0
+#define SHADER_PWROFF_HI				0x1C4
+#define TILER_PWROFF_LO					0x1D0
+#define TILER_PWROFF_HI					0x1D4
+#define L2_PWROFF_LO					0x1E0
+#define L2_PWROFF_HI					0x1E4
+
+#define SHADER_PWRTRANS_LO				0x200
+#define SHADER_PWRTRANS_HI				0x204
+#define TILER_PWRTRANS_LO				0x210
+#define TILER_PWRTRANS_HI				0x214
+#define L2_PWRTRANS_LO					0x220
+#define L2_PWRTRANS_HI					0x224
+
+#define SHADER_PWRACTIVE_LO				0x240
+#define SHADER_PWRACTIVE_HI				0x244
+#define TILER_PWRACTIVE_LO				0x250
+#define TILER_PWRACTIVE_HI				0x254
+#define L2_PWRACTIVE_LO					0x260
+#define L2_PWRACTIVE_HI					0x264
+
+#define GPU_REVID					0x280
+
+#define GPU_COHERENCY_FEATURES				0x300
+#define GPU_COHERENCY_PROT_BIT(name)			BIT(GPU_COHERENCY_  ## name)
+
+#define GPU_COHERENCY_PROTOCOL				0x304
+#define   GPU_COHERENCY_ACE				0
+#define   GPU_COHERENCY_ACE_LITE			1
+#define   GPU_COHERENCY_NONE				31
+
+#define MCU_CONTROL					0x700
+#define MCU_CONTROL_ENABLE				1
+#define MCU_CONTROL_AUTO				2
+#define MCU_CONTROL_DISABLE				0
+
+#define MCU_STATUS					0x704
+#define MCU_STATUS_DISABLED				0
+#define MCU_STATUS_ENABLED				1
+#define MCU_STATUS_HALT					2
+#define MCU_STATUS_FATAL				3
+
+/* Job Control regs */
+#define JOB_INT_RAWSTAT					0x1000
+#define JOB_INT_CLEAR					0x1004
+#define JOB_INT_MASK					0x1008
+#define JOB_INT_STAT					0x100c
+#define   JOB_INT_GLOBAL_IF				BIT(31)
+#define   JOB_INT_CSG_IF(x)				BIT(x)
+
+/* MMU regs */
+#define MMU_INT_RAWSTAT					0x2000
+#define MMU_INT_CLEAR					0x2004
+#define MMU_INT_MASK					0x2008
+#define MMU_INT_STAT					0x200c
+
+/* AS_COMMAND register commands */
+
+#define MMU_BASE					0x2400
+#define MMU_AS_SHIFT					6
+#define MMU_AS(as)					(MMU_BASE + ((as) << MMU_AS_SHIFT))
+
+#define AS_TRANSTAB_LO(as)				(MMU_AS(as) + 0x0)
+#define AS_TRANSTAB_HI(as)				(MMU_AS(as) + 0x4)
+#define AS_MEMATTR_LO(as)				(MMU_AS(as) + 0x8)
+#define AS_MEMATTR_HI(as)				(MMU_AS(as) + 0xC)
+#define   AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL		(2 << 2)
+#define   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r)	((3 << 2) | \
+							 ((w) ? BIT(0) : 0) | \
+							 ((r) ? BIT(1) : 0))
+#define   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER		(0 << 4)
+#define   AS_MEMATTR_AARCH64_SH_CPU_INNER		(1 << 4)
+#define   AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH	(2 << 4)
+#define   AS_MEMATTR_AARCH64_SHARED			(0 << 6)
+#define   AS_MEMATTR_AARCH64_INNER_OUTER_NC		(1 << 6)
+#define   AS_MEMATTR_AARCH64_INNER_OUTER_WB		(2 << 6)
+#define   AS_MEMATTR_AARCH64_FAULT			(3 << 6)
+#define AS_LOCKADDR_LO(as)				(MMU_AS(as) + 0x10)
+#define AS_LOCKADDR_HI(as)				(MMU_AS(as) + 0x14)
+#define AS_COMMAND(as)					(MMU_AS(as) + 0x18)
+#define   AS_COMMAND_NOP				0
+#define   AS_COMMAND_UPDATE				1
+#define   AS_COMMAND_LOCK				2
+#define   AS_COMMAND_UNLOCK				3
+#define   AS_COMMAND_FLUSH_PT				4
+#define   AS_COMMAND_FLUSH_MEM				5
+#define   AS_LOCK_REGION_MIN_SIZE			(1ULL << 15)
+#define AS_FAULTSTATUS(as)				(MMU_AS(as) + 0x1C)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_MASK		(0x3 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC		(0x0 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_EX			(0x1 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_READ		(0x2 << 8)
+#define  AS_FAULTSTATUS_ACCESS_TYPE_WRITE		(0x3 << 8)
+#define AS_FAULTADDRESS_LO(as)				(MMU_AS(as) + 0x20)
+#define AS_FAULTADDRESS_HI(as)				(MMU_AS(as) + 0x24)
+#define AS_STATUS(as)					(MMU_AS(as) + 0x28)
+#define   AS_STATUS_AS_ACTIVE				BIT(0)
+#define AS_TRANSCFG_LO(as)				(MMU_AS(as) + 0x30)
+#define AS_TRANSCFG_HI(as)				(MMU_AS(as) + 0x34)
+#define   AS_TRANSCFG_ADRMODE_UNMAPPED			(1 << 0)
+#define   AS_TRANSCFG_ADRMODE_IDENTITY			(2 << 0)
+#define   AS_TRANSCFG_ADRMODE_AARCH64_4K		(6 << 0)
+#define   AS_TRANSCFG_ADRMODE_AARCH64_64K		(8 << 0)
+#define   AS_TRANSCFG_INA_BITS(x)			((x) << 6)
+#define   AS_TRANSCFG_OUTA_BITS(x)			((x) << 14)
+#define   AS_TRANSCFG_SL_CONCAT				BIT(22)
+#define   AS_TRANSCFG_PTW_MEMATTR_NC			(1 << 24)
+#define   AS_TRANSCFG_PTW_MEMATTR_WB			(2 << 24)
+#define   AS_TRANSCFG_PTW_SH_NS				(0 << 28)
+#define   AS_TRANSCFG_PTW_SH_OS				(2 << 28)
+#define   AS_TRANSCFG_PTW_SH_IS				(3 << 28)
+#define   AS_TRANSCFG_PTW_RA				BIT(30)
+#define   AS_TRANSCFG_DISABLE_HIER_AP			BIT(33)
+#define   AS_TRANSCFG_DISABLE_AF_FAULT			BIT(34)
+#define   AS_TRANSCFG_WXN				BIT(35)
+#define   AS_TRANSCFG_XREADABLE				BIT(36)
+#define AS_FAULTEXTRA_LO(as)				(MMU_AS(as) + 0x38)
+#define AS_FAULTEXTRA_HI(as)				(MMU_AS(as) + 0x3C)
+
+#define CSF_GPU_LATEST_FLUSH_ID				0x10000
+
+#define CSF_DOORBELL(i)					(0x80000 + ((i) * 0x10000))
+#define CSF_GLB_DOORBELL_ID				0
+
+#define gpu_write(dev, reg, data) \
+	writel(data, (dev)->iomem + (reg))
+
+#define gpu_read(dev, reg) \
+	readl((dev)->iomem + (reg))
+
+#endif
--
2.42.0


From b2b686d6238e9da151c89d8998ab5aec0d80d0d7 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:17 +0100
Subject: [PATCH 04/69] [MERGED] drm/panthor: Add the device logical block

The panthor driver is designed in a modular way, where each logical
block is dealing with a specific HW-block or software feature. In order
for those blocks to communicate with each other, we need a central
panthor_device collecting all the blocks, and exposing some common
features, like interrupt handling, power management, reset, ...

This what this panthor_device logical block is about.

v6:
- Add Maxime's and Heiko's acks
- Keep header inclusion alphabetically ordered

v5:
- Suspend the MMU/GPU blocks if panthor_fw_resume() fails in
  panthor_device_resume()
- Move the pm_runtime_use_autosuspend() call before drm_dev_register()
- Add Liviu's R-b

v4:
- Check drmm_mutex_init() return code
- Fix panthor_device_reset_work() out path
- Fix the race in the unplug logic
- Fix typos
- Unplug blocks when something fails in panthor_device_init()
- Add Steve's R-b

v3:
- Add acks for the MIT+GPL2 relicensing
- Fix 32-bit support
- Shorten the sections protected by panthor_device::pm::mmio_lock to fix
  lock ordering issues.
- Rename panthor_device::pm::lock into panthor_device::pm::mmio_lock to
  better reflect what this lock is protecting
- Use dev_err_probe()
- Make sure we call drm_dev_exit() when something fails half-way in
  panthor_device_reset_work()
- Replace CSF_GPU_LATEST_FLUSH_ID_DEFAULT with a constant '1' and a
  comment to explain. Also remove setting the dummy flush ID on suspend.
- Remove drm_WARN_ON() in panthor_exception_name()
- Check pirq->suspended in panthor_xxx_irq_raw_handler()

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-4-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_device.c | 549 +++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_device.h | 394 ++++++++++++++++
 2 files changed, 943 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c
new file mode 100644
index 000000000000..bfe8da4a6e4c
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
+#include <linux/reset.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gpu.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+static int panthor_clk_init(struct panthor_device *ptdev)
+{
+	ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL);
+	if (IS_ERR(ptdev->clks.core))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.core),
+				     "get 'core' clock failed");
+
+	ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks");
+	if (IS_ERR(ptdev->clks.stacks))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.stacks),
+				     "get 'stacks' clock failed");
+
+	ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup");
+	if (IS_ERR(ptdev->clks.coregroup))
+		return dev_err_probe(ptdev->base.dev,
+				     PTR_ERR(ptdev->clks.coregroup),
+				     "get 'coregroup' clock failed");
+
+	drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core));
+	return 0;
+}
+
+void panthor_device_unplug(struct panthor_device *ptdev)
+{
+	/* This function can be called from two different path: the reset work
+	 * and the platform device remove callback. drm_dev_unplug() doesn't
+	 * deal with concurrent callers, so we have to protect drm_dev_unplug()
+	 * calls with our own lock, and bail out if the device is already
+	 * unplugged.
+	 */
+	mutex_lock(&ptdev->unplug.lock);
+	if (drm_dev_is_unplugged(&ptdev->base)) {
+		/* Someone beat us, release the lock and wait for the unplug
+		 * operation to be reported as done.
+		 **/
+		mutex_unlock(&ptdev->unplug.lock);
+		wait_for_completion(&ptdev->unplug.done);
+		return;
+	}
+
+	/* Call drm_dev_unplug() so any access to HW blocks happening after
+	 * that point get rejected.
+	 */
+	drm_dev_unplug(&ptdev->base);
+
+	/* We do the rest of the unplug with the unplug lock released,
+	 * future callers will wait on ptdev->unplug.done anyway.
+	 */
+	mutex_unlock(&ptdev->unplug.lock);
+
+	drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0);
+
+	/* Now, try to cleanly shutdown the GPU before the device resources
+	 * get reclaimed.
+	 */
+	panthor_sched_unplug(ptdev);
+	panthor_fw_unplug(ptdev);
+	panthor_mmu_unplug(ptdev);
+	panthor_gpu_unplug(ptdev);
+
+	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+
+	/* Report the unplug operation as done to unblock concurrent
+	 * panthor_device_unplug() callers.
+	 */
+	complete_all(&ptdev->unplug.done);
+}
+
+static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+
+	cancel_work_sync(&ptdev->reset.work);
+	destroy_workqueue(ptdev->reset.wq);
+}
+
+static void panthor_device_reset_work(struct work_struct *work)
+{
+	struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work);
+	int ret = 0, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) {
+		/*
+		 * No need for a reset as the device has been (or will be)
+		 * powered down
+		 */
+		atomic_set(&ptdev->reset.pending, 0);
+		return;
+	}
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return;
+
+	panthor_sched_pre_reset(ptdev);
+	panthor_fw_pre_reset(ptdev, true);
+	panthor_mmu_pre_reset(ptdev);
+	panthor_gpu_soft_reset(ptdev);
+	panthor_gpu_l2_power_on(ptdev);
+	panthor_mmu_post_reset(ptdev);
+	ret = panthor_fw_post_reset(ptdev);
+	if (ret)
+		goto out_dev_exit;
+
+	atomic_set(&ptdev->reset.pending, 0);
+	panthor_sched_post_reset(ptdev);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+
+	if (ret) {
+		panthor_device_unplug(ptdev);
+		drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable.");
+	}
+}
+
+static bool panthor_device_is_initialized(struct panthor_device *ptdev)
+{
+	return !!ptdev->scheduler;
+}
+
+static void panthor_device_free_page(struct drm_device *ddev, void *data)
+{
+	free_page((unsigned long)data);
+}
+
+int panthor_device_init(struct panthor_device *ptdev)
+{
+	struct resource *res;
+	struct page *p;
+	int ret;
+
+	ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT;
+
+	init_completion(&ptdev->unplug.done);
+	ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock);
+	if (ret)
+		return ret;
+
+	ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock);
+	if (ret)
+		return ret;
+
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	p = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	ptdev->pm.dummy_latest_flush = page_address(p);
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page,
+				       ptdev->pm.dummy_latest_flush);
+	if (ret)
+		return ret;
+
+	/*
+	 * Set the dummy page holding the latest flush to 1. This will cause the
+	 * flush to avoided as we know it isn't necessary if the submission
+	 * happens while the dummy page is mapped. Zero cannot be used because
+	 * that means 'always flush'.
+	 */
+	*ptdev->pm.dummy_latest_flush = 1;
+
+	INIT_WORK(&ptdev->reset.work, panthor_device_reset_work);
+	ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0);
+	if (!ptdev->reset.wq)
+		return -ENOMEM;
+
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL);
+	if (ret)
+		return ret;
+
+	ret = panthor_clk_init(ptdev);
+	if (ret)
+		return ret;
+
+	ret = panthor_devfreq_init(ptdev);
+	if (ret)
+		return ret;
+
+	ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev),
+							      0, &res);
+	if (IS_ERR(ptdev->iomem))
+		return PTR_ERR(ptdev->iomem);
+
+	ptdev->phys_addr = res->start;
+
+	ret = devm_pm_runtime_enable(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (ret)
+		return ret;
+
+	ret = panthor_gpu_init(ptdev);
+	if (ret)
+		goto err_rpm_put;
+
+	ret = panthor_mmu_init(ptdev);
+	if (ret)
+		goto err_unplug_gpu;
+
+	ret = panthor_fw_init(ptdev);
+	if (ret)
+		goto err_unplug_mmu;
+
+	ret = panthor_sched_init(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	/* ~3 frames */
+	pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
+	pm_runtime_use_autosuspend(ptdev->base.dev);
+
+	ret = drm_dev_register(&ptdev->base, 0);
+	if (ret)
+		goto err_disable_autosuspend;
+
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+	return 0;
+
+err_disable_autosuspend:
+	pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+	panthor_sched_unplug(ptdev);
+
+err_unplug_fw:
+	panthor_fw_unplug(ptdev);
+
+err_unplug_mmu:
+	panthor_mmu_unplug(ptdev);
+
+err_unplug_gpu:
+	panthor_gpu_unplug(ptdev);
+
+err_rpm_put:
+	pm_runtime_put_sync_suspend(ptdev->base.dev);
+	return ret;
+}
+
+#define PANTHOR_EXCEPTION(id) \
+	[DRM_PANTHOR_EXCEPTION_ ## id] = { \
+		.name = #id, \
+	}
+
+struct panthor_exception_info {
+	const char *name;
+};
+
+static const struct panthor_exception_info panthor_exception_infos[] = {
+	PANTHOR_EXCEPTION(OK),
+	PANTHOR_EXCEPTION(TERMINATED),
+	PANTHOR_EXCEPTION(KABOOM),
+	PANTHOR_EXCEPTION(EUREKA),
+	PANTHOR_EXCEPTION(ACTIVE),
+	PANTHOR_EXCEPTION(CS_RES_TERM),
+	PANTHOR_EXCEPTION(CS_CONFIG_FAULT),
+	PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT),
+	PANTHOR_EXCEPTION(CS_BUS_FAULT),
+	PANTHOR_EXCEPTION(CS_INSTR_INVALID),
+	PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW),
+	PANTHOR_EXCEPTION(CS_INHERIT_FAULT),
+	PANTHOR_EXCEPTION(INSTR_INVALID_PC),
+	PANTHOR_EXCEPTION(INSTR_INVALID_ENC),
+	PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT),
+	PANTHOR_EXCEPTION(DATA_INVALID_FAULT),
+	PANTHOR_EXCEPTION(TILE_RANGE_FAULT),
+	PANTHOR_EXCEPTION(ADDR_RANGE_FAULT),
+	PANTHOR_EXCEPTION(IMPRECISE_FAULT),
+	PANTHOR_EXCEPTION(OOM),
+	PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR),
+	PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT),
+	PANTHOR_EXCEPTION(GPU_BUS_FAULT),
+	PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT),
+	PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_0),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_1),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_2),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_3),
+	PANTHOR_EXCEPTION(TRANSLATION_FAULT_4),
+	PANTHOR_EXCEPTION(PERM_FAULT_0),
+	PANTHOR_EXCEPTION(PERM_FAULT_1),
+	PANTHOR_EXCEPTION(PERM_FAULT_2),
+	PANTHOR_EXCEPTION(PERM_FAULT_3),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_1),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_2),
+	PANTHOR_EXCEPTION(ACCESS_FLAG_3),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2),
+	PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2),
+	PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3),
+};
+
+const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code)
+{
+	if (exception_code >= ARRAY_SIZE(panthor_exception_infos) ||
+	    !panthor_exception_infos[exception_code].name)
+		return "Unknown exception type";
+
+	return panthor_exception_infos[exception_code].name;
+}
+
+static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct panthor_device *ptdev = vma->vm_private_data;
+	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long pfn;
+	pgprot_t pgprot;
+	vm_fault_t ret;
+	bool active;
+	int cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return VM_FAULT_SIGBUS;
+
+	mutex_lock(&ptdev->pm.mmio_lock);
+	active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE;
+
+	switch (panthor_device_mmio_offset(id)) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (active)
+			pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID);
+		else
+			pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush);
+		break;
+
+	default:
+		ret = VM_FAULT_SIGBUS;
+		goto out_unlock;
+	}
+
+	pgprot = vma->vm_page_prot;
+	if (active)
+		pgprot = pgprot_noncached(pgprot);
+
+	ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot);
+
+out_unlock:
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static const struct vm_operations_struct panthor_mmio_vm_ops = {
+	.fault = panthor_mmio_vm_fault,
+};
+
+int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma)
+{
+	u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT;
+
+	switch (panthor_device_mmio_offset(id)) {
+	case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET:
+		if (vma->vm_end - vma->vm_start != PAGE_SIZE ||
+		    (vma->vm_flags & (VM_WRITE | VM_EXEC)))
+			return -EINVAL;
+
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* Defer actual mapping to the fault handler. */
+	vma->vm_private_data = ptdev;
+	vma->vm_ops = &panthor_mmio_vm_ops;
+	vm_flags_set(vma,
+		     VM_IO | VM_DONTCOPY | VM_DONTEXPAND |
+		     VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+int panthor_device_resume(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED)
+		return -EINVAL;
+
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING);
+
+	ret = clk_prepare_enable(ptdev->clks.core);
+	if (ret)
+		goto err_set_suspended;
+
+	ret = clk_prepare_enable(ptdev->clks.stacks);
+	if (ret)
+		goto err_disable_core_clk;
+
+	ret = clk_prepare_enable(ptdev->clks.coregroup);
+	if (ret)
+		goto err_disable_stacks_clk;
+
+	ret = panthor_devfreq_resume(ptdev);
+	if (ret)
+		goto err_disable_coregroup_clk;
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		panthor_gpu_resume(ptdev);
+		panthor_mmu_resume(ptdev);
+		ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+		if (!ret) {
+			panthor_sched_resume(ptdev);
+		} else {
+			panthor_mmu_suspend(ptdev);
+			panthor_gpu_suspend(ptdev);
+		}
+
+		drm_dev_exit(cookie);
+
+		if (ret)
+			goto err_suspend_devfreq;
+	}
+
+	if (atomic_read(&ptdev->reset.pending))
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+
+	/* Clear all IOMEM mappings pointing to this device after we've
+	 * resumed. This way the fake mappings pointing to the dummy pages
+	 * are removed and the real iomem mapping will be restored on next
+	 * access.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	return 0;
+
+err_suspend_devfreq:
+	panthor_devfreq_suspend(ptdev);
+
+err_disable_coregroup_clk:
+	clk_disable_unprepare(ptdev->clks.coregroup);
+
+err_disable_stacks_clk:
+	clk_disable_unprepare(ptdev->clks.stacks);
+
+err_disable_core_clk:
+	clk_disable_unprepare(ptdev->clks.core);
+
+err_set_suspended:
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	return ret;
+}
+
+int panthor_device_suspend(struct device *dev)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	int ret, cookie;
+
+	if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE)
+		return -EINVAL;
+
+	/* Clear all IOMEM mappings pointing to this device before we
+	 * shutdown the power-domain and clocks. Failing to do that results
+	 * in external aborts when the process accesses the iomem region.
+	 * We change the state and call unmap_mapping_range() with the
+	 * mmio_lock held to make sure the vm_fault handler won't set up
+	 * invalid mappings.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+
+	if (panthor_device_is_initialized(ptdev) &&
+	    drm_dev_enter(&ptdev->base, &cookie)) {
+		cancel_work_sync(&ptdev->reset.work);
+
+		/* We prepare everything as if we were resetting the GPU.
+		 * The end of the reset will happen in the resume path though.
+		 */
+		panthor_sched_suspend(ptdev);
+		panthor_fw_suspend(ptdev);
+		panthor_mmu_suspend(ptdev);
+		panthor_gpu_suspend(ptdev);
+		drm_dev_exit(cookie);
+	}
+
+	ret = panthor_devfreq_suspend(ptdev);
+	if (ret) {
+		if (panthor_device_is_initialized(ptdev) &&
+		    drm_dev_enter(&ptdev->base, &cookie)) {
+			panthor_gpu_resume(ptdev);
+			panthor_mmu_resume(ptdev);
+			drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev));
+			panthor_sched_resume(ptdev);
+			drm_dev_exit(cookie);
+		}
+
+		goto err_set_active;
+	}
+
+	clk_disable_unprepare(ptdev->clks.coregroup);
+	clk_disable_unprepare(ptdev->clks.stacks);
+	clk_disable_unprepare(ptdev->clks.core);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED);
+	return 0;
+
+err_set_active:
+	/* If something failed and we have to revert back to an
+	 * active state, we also need to clear the MMIO userspace
+	 * mappings, so any dumb pages that were mapped while we
+	 * were trying to suspend gets invalidated.
+	 */
+	mutex_lock(&ptdev->pm.mmio_lock);
+	atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE);
+	unmap_mapping_range(ptdev->base.anon_inode->i_mapping,
+			    DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1);
+	mutex_unlock(&ptdev->pm.mmio_lock);
+	return ret;
+}
+#endif
diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h
new file mode 100644
index 000000000000..51c9d61b6796
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVICE_H__
+#define __PANTHOR_DEVICE_H__
+
+#include <linux/atomic.h>
+#include <linux/io-pgtable.h>
+#include <linux/regulator/consumer.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_device.h>
+#include <drm/drm_mm.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+struct panthor_csf;
+struct panthor_csf_ctx;
+struct panthor_device;
+struct panthor_gpu;
+struct panthor_group_pool;
+struct panthor_heap_pool;
+struct panthor_job;
+struct panthor_mmu;
+struct panthor_fw;
+struct panthor_perfcnt;
+struct panthor_vm;
+struct panthor_vm_pool;
+
+/**
+ * enum panthor_device_pm_state - PM state
+ */
+enum panthor_device_pm_state {
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0,
+
+	/** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */
+	PANTHOR_DEVICE_PM_STATE_RESUMING,
+
+	/** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */
+	PANTHOR_DEVICE_PM_STATE_ACTIVE,
+
+	/** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */
+	PANTHOR_DEVICE_PM_STATE_SUSPENDING,
+};
+
+/**
+ * struct panthor_irq - IRQ data
+ *
+ * Used to automate IRQ handling for the 3 different IRQs we have in this driver.
+ */
+struct panthor_irq {
+	/** @ptdev: Panthor device */
+	struct panthor_device *ptdev;
+
+	/** @irq: IRQ number. */
+	int irq;
+
+	/** @mask: Current mask being applied to xxx_INT_MASK. */
+	u32 mask;
+
+	/** @suspended: Set to true when the IRQ is suspended. */
+	atomic_t suspended;
+};
+
+/**
+ * struct panthor_device - Panthor device
+ */
+struct panthor_device {
+	/** @base: Base drm_device. */
+	struct drm_device base;
+
+	/** @phys_addr: Physical address of the iomem region. */
+	phys_addr_t phys_addr;
+
+	/** @iomem: CPU mapping of the IOMEM region. */
+	void __iomem *iomem;
+
+	/** @clks: GPU clocks. */
+	struct {
+		/** @core: Core clock. */
+		struct clk *core;
+
+		/** @stacks: Stacks clock. This clock is optional. */
+		struct clk *stacks;
+
+		/** @coregroup: Core group clock. This clock is optional. */
+		struct clk *coregroup;
+	} clks;
+
+	/** @coherent: True if the CPU/GPU are memory coherent. */
+	bool coherent;
+
+	/** @gpu_info: GPU information. */
+	struct drm_panthor_gpu_info gpu_info;
+
+	/** @csif_info: Command stream interface information. */
+	struct drm_panthor_csif_info csif_info;
+
+	/** @gpu: GPU management data. */
+	struct panthor_gpu *gpu;
+
+	/** @fw: FW management data. */
+	struct panthor_fw *fw;
+
+	/** @mmu: MMU management data. */
+	struct panthor_mmu *mmu;
+
+	/** @scheduler: Scheduler management data. */
+	struct panthor_scheduler *scheduler;
+
+	/** @devfreq: Device frequency scaling management data. */
+	struct panthor_devfreq *devfreq;
+
+	/** @unplug: Device unplug related fields. */
+	struct {
+		/** @lock: Lock used to serialize unplug operations. */
+		struct mutex lock;
+
+		/**
+		 * @done: Completion object signaled when the unplug
+		 * operation is done.
+		 */
+		struct completion done;
+	} unplug;
+
+	/** @reset: Reset related fields. */
+	struct {
+		/** @wq: Ordered worqueud used to schedule reset operations. */
+		struct workqueue_struct *wq;
+
+		/** @work: Reset work. */
+		struct work_struct work;
+
+		/** @pending: Set to true if a reset is pending. */
+		atomic_t pending;
+	} reset;
+
+	/** @pm: Power management related data. */
+	struct {
+		/** @state: Power state. */
+		atomic_t state;
+
+		/**
+		 * @mmio_lock: Lock protecting MMIO userspace CPU mappings.
+		 *
+		 * This is needed to ensure we map the dummy IO pages when
+		 * the device is being suspended, and the real IO pages when
+		 * the device is being resumed. We can't just do with the
+		 * state atomicity to deal with this race.
+		 */
+		struct mutex mmio_lock;
+
+		/**
+		 * @dummy_latest_flush: Dummy LATEST_FLUSH page.
+		 *
+		 * Used to replace the real LATEST_FLUSH page when the GPU
+		 * is suspended.
+		 */
+		u32 *dummy_latest_flush;
+	} pm;
+};
+
+/**
+ * struct panthor_file - Panthor file
+ */
+struct panthor_file {
+	/** @ptdev: Device attached to this file. */
+	struct panthor_device *ptdev;
+
+	/** @vms: VM pool attached to this file. */
+	struct panthor_vm_pool *vms;
+
+	/** @groups: Scheduling group pool attached to this file. */
+	struct panthor_group_pool *groups;
+};
+
+int panthor_device_init(struct panthor_device *ptdev);
+void panthor_device_unplug(struct panthor_device *ptdev);
+
+/**
+ * panthor_device_schedule_reset() - Schedules a reset operation
+ */
+static inline void panthor_device_schedule_reset(struct panthor_device *ptdev)
+{
+	if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) &&
+	    atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE)
+		queue_work(ptdev->reset.wq, &ptdev->reset.work);
+}
+
+/**
+ * panthor_device_reset_is_pending() - Checks if a reset is pending.
+ *
+ * Return: true if a reset is pending, false otherwise.
+ */
+static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev)
+{
+	return atomic_read(&ptdev->reset.pending) != 0;
+}
+
+int panthor_device_mmap_io(struct panthor_device *ptdev,
+			   struct vm_area_struct *vma);
+
+int panthor_device_resume(struct device *dev);
+int panthor_device_suspend(struct device *dev);
+
+enum drm_panthor_exception_type {
+	DRM_PANTHOR_EXCEPTION_OK = 0x00,
+	DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04,
+	DRM_PANTHOR_EXCEPTION_KABOOM = 0x05,
+	DRM_PANTHOR_EXCEPTION_EUREKA = 0x06,
+	DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08,
+	DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f,
+	DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f,
+	DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40,
+	DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44,
+	DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48,
+	DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49,
+	DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a,
+	DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50,
+	DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51,
+	DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55,
+	DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58,
+	DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59,
+	DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a,
+	DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b,
+	DRM_PANTHOR_EXCEPTION_OOM = 0x60,
+	DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68,
+	DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69,
+	DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80,
+	DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88,
+	DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89,
+	DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3,
+	DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca,
+	DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda,
+	DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6,
+	DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea,
+	DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb,
+};
+
+/**
+ * panthor_exception_is_fault() - Checks if an exception is a fault.
+ *
+ * Return: true if the exception is a fault, false otherwise.
+ */
+static inline bool
+panthor_exception_is_fault(u32 exception_code)
+{
+	return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT;
+}
+
+const char *panthor_exception_name(struct panthor_device *ptdev,
+				   u32 exception_code);
+
+/**
+ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt
+ * registration function.
+ *
+ * The boiler-plate to gracefully deal with shared interrupts is
+ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER()
+ * just after the actual handler. The handler prototype is:
+ *
+ * void (*handler)(struct panthor_device *, u32 status);
+ */
+#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler)					\
+static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data)			\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+												\
+	if (atomic_read(&pirq->suspended))							\
+		return IRQ_NONE;								\
+	if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT))					\
+		return IRQ_NONE;								\
+												\
+	gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0);						\
+	return IRQ_WAKE_THREAD;									\
+}												\
+												\
+static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data)		\
+{												\
+	struct panthor_irq *pirq = data;							\
+	struct panthor_device *ptdev = pirq->ptdev;						\
+	irqreturn_t ret = IRQ_NONE;								\
+												\
+	while (true) {										\
+		u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask;	\
+												\
+		if (!status)									\
+			break;									\
+												\
+		gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status);				\
+												\
+		__handler(ptdev, status);							\
+		ret = IRQ_HANDLED;								\
+	}											\
+												\
+	if (!atomic_read(&pirq->suspended))							\
+		gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask);			\
+												\
+	return ret;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq)			\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, true);							\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0);				\
+		synchronize_irq(pirq->irq);							\
+		drm_dev_exit(cookie);								\
+	}											\
+												\
+	pirq->mask = 0;										\
+}												\
+												\
+static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask)	\
+{												\
+	int cookie;										\
+												\
+	atomic_set(&pirq->suspended, false);							\
+	pirq->mask = mask;									\
+												\
+	if (drm_dev_enter(&pirq->ptdev->base, &cookie)) {					\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask);			\
+		gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask);			\
+		drm_dev_exit(cookie);								\
+	}											\
+}												\
+												\
+static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev,			\
+					      struct panthor_irq *pirq,				\
+					      int irq, u32 mask)				\
+{												\
+	pirq->ptdev = ptdev;									\
+	pirq->irq = irq;									\
+	panthor_ ## __name ## _irq_resume(pirq, mask);						\
+												\
+	return devm_request_threaded_irq(ptdev->base.dev, irq,					\
+					 panthor_ ## __name ## _irq_raw_handler,		\
+					 panthor_ ## __name ## _irq_threaded_handler,		\
+					 IRQF_SHARED, KBUILD_MODNAME "-" # __name,		\
+					 pirq);							\
+}
+
+/**
+ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one
+ * @offset: Offset to convert.
+ *
+ * With 32-bit systems being limited by the 32-bit representation of mmap2's
+ * pgoffset field, we need to make the MMIO offset arch specific. This function
+ * converts a user MMIO offset into something the kernel driver understands.
+ *
+ * If the kernel and userspace architecture match, the offset is unchanged. If
+ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match
+ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible.
+ *
+ * Return: Adjusted offset.
+ */
+static inline u64 panthor_device_mmio_offset(u64 offset)
+{
+#ifdef CONFIG_ARM64
+	if (test_tsk_thread_flag(current, TIF_32BIT))
+		offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT;
+#endif
+
+	return offset;
+}
+
+extern struct workqueue_struct *panthor_cleanup_wq;
+
+#endif
--
2.42.0


From b16609612ab80de1fa27fea42e5a36f269d808e5 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:18 +0100
Subject: [PATCH 05/69] [MERGED] drm/panthor: Add the GPU logical block

Handles everything that's not related to the FW, the MMU or the
scheduler. This is the block dealing with the GPU property retrieval,
the GPU block power on/off logic, and some global operations, like
global cache flushing.

v6:
- Add Maxime's and Heiko's acks

v5:
- Fix GPU_MODEL() kernel doc
- Fix test in panthor_gpu_block_power_off()
- Add Steve's R-b

v4:
- Expose CORE_FEATURES through DEV_QUERY

v3:
- Add acks for the MIT/GPL2 relicensing
- Use macros to extract GPU ID info
- Make sure we reset clear pending_reqs bits when wait_event_timeout()
  times out but the corresponding bit is cleared in GPU_INT_RAWSTAT
  (can happen if the IRQ is masked or HW takes to long to call the IRQ
  handler)
- GPU_MODEL now takes separate arch and product majors to be more
  readable.
- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask.
- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are
  not updated for protected interrupts).
- Minor code tidy ups

Cc: Alexey Sheplyakov <asheplyakov@basealt.ru> # MIT+GPL2 relicensing
Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-5-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_gpu.h |  52 +++
 2 files changed, 534 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c
new file mode 100644
index 000000000000..6dbbc4cfbe7e
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_device.h"
+#include "panthor_gpu.h"
+#include "panthor_regs.h"
+
+/**
+ * struct panthor_gpu - GPU block management data.
+ */
+struct panthor_gpu {
+	/** @irq: GPU irq. */
+	struct panthor_irq irq;
+
+	/** @reqs_lock: Lock protecting access to pending_reqs. */
+	spinlock_t reqs_lock;
+
+	/** @pending_reqs: Pending GPU requests. */
+	u32 pending_reqs;
+
+	/** @reqs_acked: GPU request wait queue. */
+	wait_queue_head_t reqs_acked;
+};
+
+/**
+ * struct panthor_model - GPU model description
+ */
+struct panthor_model {
+	/** @name: Model name. */
+	const char *name;
+
+	/** @arch_major: Major version number of architecture. */
+	u8 arch_major;
+
+	/** @product_major: Major version number of product. */
+	u8 product_major;
+};
+
+/**
+ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified
+ * by a combination of the major architecture version and the major product
+ * version.
+ * @_name: Name for the GPU model.
+ * @_arch_major: Architecture major.
+ * @_product_major: Product major.
+ */
+#define GPU_MODEL(_name, _arch_major, _product_major) \
+{\
+	.name = __stringify(_name),				\
+	.arch_major = _arch_major,				\
+	.product_major = _product_major,			\
+}
+
+static const struct panthor_model gpu_models[] = {
+	GPU_MODEL(g610, 10, 7),
+	{},
+};
+
+#define GPU_INTERRUPTS_MASK	\
+	(GPU_IRQ_FAULT | \
+	 GPU_IRQ_PROTM_FAULT | \
+	 GPU_IRQ_RESET_COMPLETED | \
+	 GPU_IRQ_CLEAN_CACHES_COMPLETED)
+
+static void panthor_gpu_init_info(struct panthor_device *ptdev)
+{
+	const struct panthor_model *model;
+	u32 arch_major, product_major;
+	u32 major, minor, status;
+	unsigned int i;
+
+	ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID);
+	ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID);
+	ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID);
+	ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES);
+	ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES);
+	ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES);
+	ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES);
+	ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES);
+	ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES);
+	ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS);
+	ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE);
+	ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE);
+	ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES);
+	for (i = 0; i < 4; i++)
+		ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i));
+
+	ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT);
+
+	ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO);
+	ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32;
+
+	ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO);
+	ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32;
+
+	ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO);
+	ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32;
+
+	arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id);
+	product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id);
+	major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id);
+	minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id);
+	status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id);
+
+	for (model = gpu_models; model->name; model++) {
+		if (model->arch_major == arch_major &&
+		    model->product_major == product_major)
+			break;
+	}
+
+	drm_info(&ptdev->base,
+		 "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x",
+		 model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16,
+		 major, minor, status);
+
+	drm_info(&ptdev->base,
+		 "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x",
+		 ptdev->gpu_info.l2_features,
+		 ptdev->gpu_info.tiler_features,
+		 ptdev->gpu_info.mem_features,
+		 ptdev->gpu_info.mmu_features,
+		 ptdev->gpu_info.as_present);
+
+	drm_info(&ptdev->base,
+		 "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx",
+		 ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present,
+		 ptdev->gpu_info.tiler_present);
+}
+
+static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	if (status & GPU_IRQ_FAULT) {
+		u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS);
+		u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) |
+			      gpu_read(ptdev, GPU_FAULT_ADDR_LO);
+
+		drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n",
+			 fault_status, panthor_exception_name(ptdev, fault_status & 0xFF),
+			 address);
+	}
+	if (status & GPU_IRQ_PROTM_FAULT)
+		drm_warn(&ptdev->base, "GPU Fault in protected mode\n");
+
+	spin_lock(&ptdev->gpu->reqs_lock);
+	if (status & ptdev->gpu->pending_reqs) {
+		ptdev->gpu->pending_reqs &= ~status;
+		wake_up_all(&ptdev->gpu->reqs_acked);
+	}
+	spin_unlock(&ptdev->gpu->reqs_lock);
+}
+PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler);
+
+/**
+ * panthor_gpu_unplug() - Called when the GPU is unplugged.
+ * @ptdev: Device to unplug.
+ */
+void panthor_gpu_unplug(struct panthor_device *ptdev)
+{
+	unsigned long flags;
+
+	/* Make sure the IRQ handler is not running after that point. */
+	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+
+	/* Wake-up all waiters. */
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	ptdev->gpu->pending_reqs = 0;
+	wake_up_all(&ptdev->gpu->reqs_acked);
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+}
+
+/**
+ * panthor_gpu_init() - Initialize the GPU block
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_init(struct panthor_device *ptdev)
+{
+	struct panthor_gpu *gpu;
+	u32 pa_bits;
+	int ret, irq;
+
+	gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL);
+	if (!gpu)
+		return -ENOMEM;
+
+	spin_lock_init(&gpu->reqs_lock);
+	init_waitqueue_head(&gpu->reqs_acked);
+	ptdev->gpu = gpu;
+	panthor_gpu_init_info(ptdev);
+
+	dma_set_max_seg_size(ptdev->base.dev, UINT_MAX);
+	pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
+	ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits));
+	if (ret)
+		return ret;
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu");
+	if (irq <= 0)
+		return ret;
+
+	ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwroff_reg: Power-off register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @mask: Sub-elements to power-off.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+				const char *blk_name,
+				u32 pwroff_reg, u32 pwrtrans_reg,
+				u64 mask, u32 timeout_us)
+{
+	u32 val, i;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	if (mask & GENMASK(31, 0))
+		gpu_write(ptdev, pwroff_reg, mask);
+
+	if (mask >> 32)
+		gpu_write(ptdev, pwroff_reg + 4, mask >> 32);
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU
+ * @ptdev: Device.
+ * @blk_name: Block name.
+ * @pwron_reg: Power-on register for this block.
+ * @pwrtrans_reg: Power transition register for this block.
+ * @rdy_reg: Power transition ready register.
+ * @mask: Sub-elements to power-on.
+ * @timeout_us: Timeout in microseconds.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+			       const char *blk_name,
+			       u32 pwron_reg, u32 pwrtrans_reg,
+			       u32 rdy_reg, u64 mask, u32 timeout_us)
+{
+	u32 val, i;
+	int ret;
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4),
+						 val, !(mask32 & val),
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	if (mask & GENMASK(31, 0))
+		gpu_write(ptdev, pwron_reg, mask);
+
+	if (mask >> 32)
+		gpu_write(ptdev, pwron_reg + 4, mask >> 32);
+
+	for (i = 0; i < 2; i++) {
+		u32 mask32 = mask >> (i * 32);
+
+		if (!mask32)
+			continue;
+
+		ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4),
+						 val, (mask32 & val) == mask32,
+						 100, timeout_us);
+		if (ret) {
+			drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness",
+				blk_name, mask);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_l2_power_on() - Power-on the L2-cache
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev)
+{
+	if (ptdev->gpu_info.l2_present != 1) {
+		/*
+		 * Only support one core group now.
+		 * ~(l2_present - 1) unsets all bits in l2_present except
+		 * the bottom bit. (l2_present - 2) has all the bits in
+		 * the first core group set. AND them together to generate
+		 * a mask of cores in the first core group.
+		 */
+		u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) &
+				(ptdev->gpu_info.l2_present - 2);
+		drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n",
+			      hweight64(core_mask),
+			      hweight64(ptdev->gpu_info.shader_present));
+	}
+
+	return panthor_gpu_power_on(ptdev, L2, 1, 20000);
+}
+
+/**
+ * panthor_gpu_flush_caches() - Flush caches
+ * @ptdev: Device.
+ * @l2: L2 flush type.
+ * @lsc: LSC flush type.
+ * @other: Other flush type.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+			     u32 l2, u32 lsc, u32 other)
+{
+	bool timedout = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	if (!drm_WARN_ON(&ptdev->base,
+			 ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) {
+		ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED;
+		gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other));
+	}
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+				!(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED),
+				msecs_to_jiffies(100))) {
+		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+		if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 &&
+		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED))
+			timedout = true;
+		else
+			ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED;
+		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+	}
+
+	if (timedout) {
+		drm_err(&ptdev->base, "Flush caches timeout");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_soft_reset() - Issue a soft-reset
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_gpu_soft_reset(struct panthor_device *ptdev)
+{
+	bool timedout = false;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+	if (!drm_WARN_ON(&ptdev->base,
+			 ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) {
+		ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED;
+		gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED);
+		gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET);
+	}
+	spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+
+	if (!wait_event_timeout(ptdev->gpu->reqs_acked,
+				!(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED),
+				msecs_to_jiffies(100))) {
+		spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags);
+		if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 &&
+		    !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED))
+			timedout = true;
+		else
+			ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED;
+		spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags);
+	}
+
+	if (timedout) {
+		drm_err(&ptdev->base, "Soft reset timeout");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_gpu_suspend() - Suspend the GPU block.
+ * @ptdev: Device.
+ *
+ * Suspend the GPU irq. This should be called last in the suspend procedure,
+ * after all other blocks have been suspented.
+ */
+void panthor_gpu_suspend(struct panthor_device *ptdev)
+{
+	/*
+	 * It may be preferable to simply power down the L2, but for now just
+	 * soft-reset which will leave the L2 powered down.
+	 */
+	panthor_gpu_soft_reset(ptdev);
+	panthor_gpu_irq_suspend(&ptdev->gpu->irq);
+}
+
+/**
+ * panthor_gpu_resume() - Resume the GPU block.
+ * @ptdev: Device.
+ *
+ * Resume the IRQ handler and power-on the L2-cache.
+ * The FW takes care of powering the other blocks.
+ */
+void panthor_gpu_resume(struct panthor_device *ptdev)
+{
+	panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK);
+	panthor_gpu_l2_power_on(ptdev);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h
new file mode 100644
index 000000000000..bba7555dd3c6
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gpu.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_GPU_H__
+#define __PANTHOR_GPU_H__
+
+struct panthor_device;
+
+int panthor_gpu_init(struct panthor_device *ptdev);
+void panthor_gpu_unplug(struct panthor_device *ptdev);
+void panthor_gpu_suspend(struct panthor_device *ptdev);
+void panthor_gpu_resume(struct panthor_device *ptdev);
+
+int panthor_gpu_block_power_on(struct panthor_device *ptdev,
+			       const char *blk_name,
+			       u32 pwron_reg, u32 pwrtrans_reg,
+			       u32 rdy_reg, u64 mask, u32 timeout_us);
+int panthor_gpu_block_power_off(struct panthor_device *ptdev,
+				const char *blk_name,
+				u32 pwroff_reg, u32 pwrtrans_reg,
+				u64 mask, u32 timeout_us);
+
+/**
+ * panthor_gpu_power_on() - Power on the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \
+	panthor_gpu_block_power_on(ptdev, #type, \
+				  type ## _PWRON_LO, \
+				  type ## _PWRTRANS_LO, \
+				  type ## _READY_LO, \
+				  mask, timeout_us)
+
+/**
+ * panthor_gpu_power_off() - Power off the GPU block.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \
+	panthor_gpu_block_power_off(ptdev, #type, \
+				   type ## _PWROFF_LO, \
+				   type ## _PWRTRANS_LO, \
+				   mask, timeout_us)
+
+int panthor_gpu_l2_power_on(struct panthor_device *ptdev);
+int panthor_gpu_flush_caches(struct panthor_device *ptdev,
+			     u32 l2, u32 lsc, u32 other);
+int panthor_gpu_soft_reset(struct panthor_device *ptdev);
+
+#endif
--
2.42.0


From 3891c2fd2a1ed373dace8ac4968a1bd373f9c041 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:19 +0100
Subject: [PATCH 06/69] [MERGED] drm/panthor: Add GEM logical block

Anything relating to GEM object management is placed here. Nothing
particularly interesting here, given the implementation is based on
drm_gem_shmem_object, which is doing most of the work.

v6:
- Add Maxime's and Heiko's acks
- Return a page-aligned BO size to userspace when creating a BO
- Keep header inclusion alphabetically ordered

v5:
- Add Liviu's and Steve's R-b

v4:
- Force kernel BOs to be GPU mapped
- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers
  to simplify the call sites

v3:
- Add acks for the MIT/GPL2 relicensing
- Provide a panthor_kernel_bo abstraction for buffer objects managed by
  the kernel (will replace panthor_fw_mem and be used everywhere we were
  using panthor_gem_create_and_map() before)
- Adjust things to match drm_gpuvm changes
- Change return of panthor_gem_create_with_handle() to int

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-6-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_gem.c | 230 ++++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_gem.h | 142 ++++++++++++++++
 2 files changed, 372 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c
new file mode 100644
index 000000000000..d6483266d0c2
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/dma-buf.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_mmu.h"
+
+static void panthor_gem_free_object(struct drm_gem_object *obj)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(obj);
+	struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem;
+
+	drm_gem_free_mmap_offset(&bo->base.base);
+	mutex_destroy(&bo->gpuva_list_lock);
+	drm_gem_shmem_free(&bo->base);
+	drm_gem_object_put(vm_root_gem);
+}
+
+/**
+ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object
+ * @vm: The VM this BO was mapped to.
+ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction
+ * is skipped.
+ */
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+			       struct panthor_kernel_bo *bo)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(bo))
+		return;
+
+	panthor_kernel_bo_vunmap(bo);
+
+	if (drm_WARN_ON(bo->obj->dev,
+			to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm)))
+		goto out_free_bo;
+
+	ret = panthor_vm_unmap_range(vm, bo->va_node.start,
+				     panthor_kernel_bo_size(bo));
+	if (ret)
+		goto out_free_bo;
+
+	panthor_vm_free_va(vm, &bo->va_node);
+	drm_gem_object_put(bo->obj);
+
+out_free_bo:
+	kfree(bo);
+}
+
+/**
+ * panthor_kernel_bo_create() - Create and map a GEM object to a VM
+ * @ptdev: Device.
+ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped.
+ * @size: Size of the buffer object.
+ * @bo_flags: Combination of drm_panthor_bo_flags flags.
+ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those
+ * that are related to map operations).
+ * @gpu_va: GPU address assigned when mapping to the VM.
+ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be
+ * automatically allocated.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+			 size_t size, u32 bo_flags, u32 vm_map_flags,
+			 u64 gpu_va)
+{
+	struct drm_gem_shmem_object *obj;
+	struct panthor_kernel_bo *kbo;
+	struct panthor_gem_object *bo;
+	int ret;
+
+	if (drm_WARN_ON(&ptdev->base, !vm))
+		return ERR_PTR(-EINVAL);
+
+	kbo = kzalloc(sizeof(*kbo), GFP_KERNEL);
+	if (!kbo)
+		return ERR_PTR(-ENOMEM);
+
+	obj = drm_gem_shmem_create(&ptdev->base, size);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto err_free_bo;
+	}
+
+	bo = to_panthor_bo(&obj->base);
+	size = obj->base.size;
+	kbo->obj = &obj->base;
+	bo->flags = bo_flags;
+
+	ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node);
+	if (ret)
+		goto err_put_obj;
+
+	ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags);
+	if (ret)
+		goto err_free_va;
+
+	bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
+	drm_gem_object_get(bo->exclusive_vm_root_gem);
+	bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+	return kbo;
+
+err_free_va:
+	panthor_vm_free_va(vm, &kbo->va_node);
+
+err_put_obj:
+	drm_gem_object_put(&obj->base);
+
+err_free_bo:
+	kfree(kbo);
+	return ERR_PTR(ret);
+}
+
+static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(obj);
+
+	/* Don't allow mmap on objects that have the NO_MMAP flag set. */
+	if (bo->flags & DRM_PANTHOR_BO_NO_MMAP)
+		return -EINVAL;
+
+	return drm_gem_shmem_object_mmap(obj, vma);
+}
+
+static struct dma_buf *
+panthor_gem_prime_export(struct drm_gem_object *obj, int flags)
+{
+	/* We can't export GEMs that have an exclusive VM. */
+	if (to_panthor_bo(obj)->exclusive_vm_root_gem)
+		return ERR_PTR(-EINVAL);
+
+	return drm_gem_prime_export(obj, flags);
+}
+
+static const struct drm_gem_object_funcs panthor_gem_funcs = {
+	.free = panthor_gem_free_object,
+	.print_info = drm_gem_shmem_object_print_info,
+	.pin = drm_gem_shmem_object_pin,
+	.unpin = drm_gem_shmem_object_unpin,
+	.get_sg_table = drm_gem_shmem_object_get_sg_table,
+	.vmap = drm_gem_shmem_object_vmap,
+	.vunmap = drm_gem_shmem_object_vunmap,
+	.mmap = panthor_gem_mmap,
+	.export = panthor_gem_prime_export,
+	.vm_ops = &drm_gem_shmem_vm_ops,
+};
+
+/**
+ * panthor_gem_create_object - Implementation of driver->gem_create_object.
+ * @ddev: DRM device
+ * @size: Size in bytes of the memory the object will reference
+ *
+ * This lets the GEM helpers allocate object structs for us, and keep
+ * our BO stats correct.
+ */
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_gem_object *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	obj->base.base.funcs = &panthor_gem_funcs;
+	obj->base.map_wc = !ptdev->coherent;
+	mutex_init(&obj->gpuva_list_lock);
+	drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock);
+
+	return &obj->base.base;
+}
+
+/**
+ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle.
+ * @file: DRM file.
+ * @ddev: DRM device.
+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared.
+ * @size: Size of the GEM object to allocate.
+ * @flags: Combination of drm_panthor_bo_flags flags.
+ * @handle: Pointer holding the handle pointing to the new GEM object.
+ *
+ * Return: Zero on success
+ */
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+			       struct drm_device *ddev,
+			       struct panthor_vm *exclusive_vm,
+			       u64 *size, u32 flags, u32 *handle)
+{
+	int ret;
+	struct drm_gem_shmem_object *shmem;
+	struct panthor_gem_object *bo;
+
+	shmem = drm_gem_shmem_create(ddev, *size);
+	if (IS_ERR(shmem))
+		return PTR_ERR(shmem);
+
+	bo = to_panthor_bo(&shmem->base);
+	bo->flags = flags;
+
+	if (exclusive_vm) {
+		bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
+		drm_gem_object_get(bo->exclusive_vm_root_gem);
+		bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
+	}
+
+	/*
+	 * Allocate an id of idr table where the obj is registered
+	 * and handle has the id what user can see.
+	 */
+	ret = drm_gem_handle_create(file, &shmem->base, handle);
+	if (!ret)
+		*size = bo->base.base.size;
+
+	/* drop reference from allocate - handle holds it now. */
+	drm_gem_object_put(&shmem->base);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h
new file mode 100644
index 000000000000..3bccba394d00
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_gem.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_GEM_H__
+#define __PANTHOR_GEM_H__
+
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_mm.h>
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+struct panthor_vm;
+
+/**
+ * struct panthor_gem_object - Driver specific GEM object.
+ */
+struct panthor_gem_object {
+	/** @base: Inherit from drm_gem_shmem_object. */
+	struct drm_gem_shmem_object base;
+
+	/**
+	 * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object
+	 * is attached to.
+	 *
+	 * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a
+	 * different VM will fail.
+	 *
+	 * All FW memory objects have this field set to the root GEM of the MCU
+	 * VM.
+	 */
+	struct drm_gem_object *exclusive_vm_root_gem;
+
+	/**
+	 * @gpuva_list_lock: Custom GPUVA lock.
+	 *
+	 * Used to protect insertion of drm_gpuva elements to the
+	 * drm_gem_object.gpuva.list list.
+	 *
+	 * We can't use the GEM resv for that, because drm_gpuva_link() is
+	 * called in a dma-signaling path, where we're not allowed to take
+	 * resv locks.
+	 */
+	struct mutex gpuva_list_lock;
+
+	/** @flags: Combination of drm_panthor_bo_flags flags. */
+	u32 flags;
+};
+
+/**
+ * struct panthor_kernel_bo - Kernel buffer object.
+ *
+ * These objects are only manipulated by the kernel driver and not
+ * directly exposed to the userspace. The GPU address of a kernel
+ * BO might be passed to userspace though.
+ */
+struct panthor_kernel_bo {
+	/**
+	 * @obj: The GEM object backing this kernel buffer object.
+	 */
+	struct drm_gem_object *obj;
+
+	/**
+	 * @va_node: VA space allocated to this GEM.
+	 */
+	struct drm_mm_node va_node;
+
+	/**
+	 * @kmap: Kernel CPU mapping of @gem.
+	 */
+	void *kmap;
+};
+
+static inline
+struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj)
+{
+	return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base);
+}
+
+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size);
+
+struct drm_gem_object *
+panthor_gem_prime_import_sg_table(struct drm_device *ddev,
+				  struct dma_buf_attachment *attach,
+				  struct sg_table *sgt);
+
+int
+panthor_gem_create_with_handle(struct drm_file *file,
+			       struct drm_device *ddev,
+			       struct panthor_vm *exclusive_vm,
+			       u64 *size, u32 flags, uint32_t *handle);
+
+static inline u64
+panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo)
+{
+	return bo->va_node.start;
+}
+
+static inline size_t
+panthor_kernel_bo_size(struct panthor_kernel_bo *bo)
+{
+	return bo->obj->size;
+}
+
+static inline int
+panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo)
+{
+	struct iosys_map map;
+	int ret;
+
+	if (bo->kmap)
+		return 0;
+
+	ret = drm_gem_vmap_unlocked(bo->obj, &map);
+	if (ret)
+		return ret;
+
+	bo->kmap = map.vaddr;
+	return 0;
+}
+
+static inline void
+panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo)
+{
+	if (bo->kmap) {
+		struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap);
+
+		drm_gem_vunmap_unlocked(bo->obj, &map);
+		bo->kmap = NULL;
+	}
+}
+
+struct panthor_kernel_bo *
+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
+			 size_t size, u32 bo_flags, u32 vm_map_flags,
+			 u64 gpu_va);
+
+void panthor_kernel_bo_destroy(struct panthor_vm *vm,
+			       struct panthor_kernel_bo *bo);
+
+#endif /* __PANTHOR_GEM_H__ */
--
2.42.0


From bd8892e952681399c034a95915778399387104d2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:20 +0100
Subject: [PATCH 07/69] [MERGED] drm/panthor: Add the devfreq logical block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every thing related to devfreq in placed in panthor_devfreq.c, and
helpers that can be called by other logical blocks are exposed through
panthor_devfreq.h.

This implementation is loosely based on the panfrost implementation,
the only difference being that we don't count device users, because
the idle/active state will be managed by the scheduler logic.

v6:
- Add Maxime's and Heiko's acks
- Keep header inclusion alphabetically ordered

v4:
- Add Clément's A-b for the relicensing

v3:
- Add acks for the MIT/GPL2 relicensing

v2:
- Added in v2

Cc: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Acked-by: Clément Péron <peron.clem@gmail.com> # MIT+GPL2 relicensing
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-7-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_devfreq.h |  21 ++
 2 files changed, 304 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c
new file mode 100644
index 000000000000..7ac4fa290f27
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/clk.h>
+#include <linux/devfreq.h>
+#include <linux/devfreq_cooling.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+
+#include <drm/drm_managed.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+
+/**
+ * struct panthor_devfreq - Device frequency management
+ */
+struct panthor_devfreq {
+	/** @devfreq: devfreq device. */
+	struct devfreq *devfreq;
+
+	/** @gov_data: Governor data. */
+	struct devfreq_simple_ondemand_data gov_data;
+
+	/** @busy_time: Busy time. */
+	ktime_t busy_time;
+
+	/** @idle_time: Idle time. */
+	ktime_t idle_time;
+
+	/** @time_last_update: Last update time. */
+	ktime_t time_last_update;
+
+	/** @last_busy_state: True if the GPU was busy last time we updated the state. */
+	bool last_busy_state;
+
+	/*
+	 * @lock: Lock used to protect busy_time, idle_time, time_last_update and
+	 * last_busy_state.
+	 *
+	 * These fields can be accessed concurrently by panthor_devfreq_get_dev_status()
+	 * and panthor_devfreq_record_{busy,idle}().
+	 */
+	spinlock_t lock;
+};
+
+static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq)
+{
+	ktime_t now, last;
+
+	now = ktime_get();
+	last = pdevfreq->time_last_update;
+
+	if (pdevfreq->last_busy_state)
+		pdevfreq->busy_time += ktime_sub(now, last);
+	else
+		pdevfreq->idle_time += ktime_sub(now, last);
+
+	pdevfreq->time_last_update = now;
+}
+
+static int panthor_devfreq_target(struct device *dev, unsigned long *freq,
+				  u32 flags)
+{
+	struct dev_pm_opp *opp;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+	dev_pm_opp_put(opp);
+
+	return dev_pm_opp_set_rate(dev, *freq);
+}
+
+static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq)
+{
+	pdevfreq->busy_time = 0;
+	pdevfreq->idle_time = 0;
+	pdevfreq->time_last_update = ktime_get();
+}
+
+static int panthor_devfreq_get_dev_status(struct device *dev,
+					  struct devfreq_dev_status *status)
+{
+	struct panthor_device *ptdev = dev_get_drvdata(dev);
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	status->current_frequency = clk_get_rate(ptdev->clks.core);
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+
+	status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time,
+						   pdevfreq->idle_time));
+
+	status->busy_time = ktime_to_ns(pdevfreq->busy_time);
+
+	panthor_devfreq_reset(pdevfreq);
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+
+	drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n",
+		status->busy_time, status->total_time,
+		status->busy_time / (status->total_time / 100),
+		status->current_frequency / 1000 / 1000);
+
+	return 0;
+}
+
+static struct devfreq_dev_profile panthor_devfreq_profile = {
+	.timer = DEVFREQ_TIMER_DELAYED,
+	.polling_ms = 50, /* ~3 frames */
+	.target = panthor_devfreq_target,
+	.get_dev_status = panthor_devfreq_get_dev_status,
+};
+
+int panthor_devfreq_init(struct panthor_device *ptdev)
+{
+	/* There's actually 2 regulators (mali and sram), but the OPP core only
+	 * supports one.
+	 *
+	 * We assume the sram regulator is coupled with the mali one and let
+	 * the coupling logic deal with voltage updates.
+	 */
+	static const char * const reg_names[] = { "mali", NULL };
+	struct thermal_cooling_device *cooling;
+	struct device *dev = ptdev->base.dev;
+	struct panthor_devfreq *pdevfreq;
+	struct dev_pm_opp *opp;
+	unsigned long cur_freq;
+	int ret;
+
+	pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL);
+	if (!pdevfreq)
+		return -ENOMEM;
+
+	ptdev->devfreq = pdevfreq;
+
+	ret = devm_pm_opp_set_regulators(dev, reg_names);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n");
+
+		return ret;
+	}
+
+	ret = devm_pm_opp_of_add_table(dev);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&pdevfreq->lock);
+
+	panthor_devfreq_reset(pdevfreq);
+
+	cur_freq = clk_get_rate(ptdev->clks.core);
+
+	opp = devfreq_recommended_opp(dev, &cur_freq, 0);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	panthor_devfreq_profile.initial_freq = cur_freq;
+
+	/* Regulator coupling only takes care of synchronizing/balancing voltage
+	 * updates, but the coupled regulator needs to be enabled manually.
+	 *
+	 * We use devm_regulator_get_enable_optional() and keep the sram supply
+	 * enabled until the device is removed, just like we do for the mali
+	 * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called,
+	 * and disabled when the opp_table is torn down, using the devm action.
+	 *
+	 * If we really care about disabling regulators on suspend, we should:
+	 * - use devm_regulator_get_optional() here
+	 * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function
+	 *   (this disables the regulator passed to the OPP layer)
+	 * - call dev_pm_opp_set_opp(dev, NULL) and
+	 *   regulator_disable(ptdev->regulators.sram) in
+	 *   panthor_devfreq_suspend()
+	 * - call dev_pm_opp_set_opp(dev, default_opp) and
+	 *   regulator_enable(ptdev->regulators.sram) in
+	 *   panthor_devfreq_resume()
+	 *
+	 * But without knowing if it's beneficial or not (in term of power
+	 * consumption), or how much it slows down the suspend/resume steps,
+	 * let's just keep regulators enabled for the device lifetime.
+	 */
+	ret = devm_regulator_get_enable_optional(dev, "sram");
+	if (ret && ret != -ENODEV) {
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n");
+		return ret;
+	}
+
+	/*
+	 * Set the recommend OPP this will enable and configure the regulator
+	 * if any and will avoid a switch off by regulator_late_cleanup()
+	 */
+	ret = dev_pm_opp_set_opp(dev, opp);
+	if (ret) {
+		DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n");
+		return ret;
+	}
+
+	dev_pm_opp_put(opp);
+
+	/*
+	 * Setup default thresholds for the simple_ondemand governor.
+	 * The values are chosen based on experiments.
+	 */
+	pdevfreq->gov_data.upthreshold = 45;
+	pdevfreq->gov_data.downdifferential = 5;
+
+	pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile,
+						    DEVFREQ_GOV_SIMPLE_ONDEMAND,
+						    &pdevfreq->gov_data);
+	if (IS_ERR(pdevfreq->devfreq)) {
+		DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n");
+		ret = PTR_ERR(pdevfreq->devfreq);
+		pdevfreq->devfreq = NULL;
+		return ret;
+	}
+
+	cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL);
+	if (IS_ERR(cooling))
+		DRM_DEV_INFO(dev, "Failed to register cooling device\n");
+
+	return 0;
+}
+
+int panthor_devfreq_resume(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+	if (!pdevfreq->devfreq)
+		return 0;
+
+	panthor_devfreq_reset(pdevfreq);
+
+	return devfreq_resume_device(pdevfreq->devfreq);
+}
+
+int panthor_devfreq_suspend(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+
+	if (!pdevfreq->devfreq)
+		return 0;
+
+	return devfreq_suspend_device(pdevfreq->devfreq);
+}
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	if (!pdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+	pdevfreq->last_busy_state = true;
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
+
+void panthor_devfreq_record_idle(struct panthor_device *ptdev)
+{
+	struct panthor_devfreq *pdevfreq = ptdev->devfreq;
+	unsigned long irqflags;
+
+	if (!pdevfreq->devfreq)
+		return;
+
+	spin_lock_irqsave(&pdevfreq->lock, irqflags);
+
+	panthor_devfreq_update_utilization(pdevfreq);
+	pdevfreq->last_busy_state = false;
+
+	spin_unlock_irqrestore(&pdevfreq->lock, irqflags);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h
new file mode 100644
index 000000000000..83a5c9522493
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Collabora ltd. */
+
+#ifndef __PANTHOR_DEVFREQ_H__
+#define __PANTHOR_DEVFREQ_H__
+
+struct devfreq;
+struct thermal_cooling_device;
+
+struct panthor_device;
+struct panthor_devfreq;
+
+int panthor_devfreq_init(struct panthor_device *ptdev);
+
+int panthor_devfreq_resume(struct panthor_device *ptdev);
+int panthor_devfreq_suspend(struct panthor_device *ptdev);
+
+void panthor_devfreq_record_busy(struct panthor_device *ptdev);
+void panthor_devfreq_record_idle(struct panthor_device *ptdev);
+
+#endif /* __PANTHOR_DEVFREQ_H__ */
--
2.42.0


From 7ad168e5e8513c3e3ef083b6837540db562ac935 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:21 +0100
Subject: [PATCH 08/69] [MERGED] drm/panthor: Add the MMU/VM logical block

MMU and VM management is related and placed in the same source file.

Page table updates are delegated to the io-pgtable-arm driver that's in
the iommu subsystem.

The VM management logic is based on drm_gpuva_mgr, and is assuming the
VA space is mostly managed by the usermode driver, except for a reserved
portion of this VA-space that's used for kernel objects (like the heap
contexts/chunks).

Both asynchronous and synchronous VM operations are supported, and
internal helpers are exposed to allow other logical blocks to map their
buffers in the GPU VA space.

There's one VM_BIND queue per-VM (meaning the Vulkan driver can only
expose one sparse-binding queue), and this bind queue is managed with
a 1:1 drm_sched_entity:drm_gpu_scheduler, such that each VM gets its own
independent execution queue, avoiding VM operation serialization at the
device level (things are still serialized at the VM level).

The rest is just implementation details that are hopefully well explained
in the documentation.

v6:
- Add Maxime's and Heiko's acks
- Add Steve's R-b
- Adjust the TRANSCFG value to account for SW VA space limitation on
  32-bit systems
- Keep header inclusion alphabetically ordered

v5:
- Fix a double panthor_vm_cleanup_op_ctx() call
- Fix a race between panthor_vm_prepare_map_op_ctx() and
  panthor_vm_bo_put()
- Fix panthor_vm_pool_destroy_vm() kernel doc
- Fix paddr adjustment in panthor_vm_map_pages()
- Fix bo_offset calculation in panthor_vm_get_bo_for_va()

v4:
- Add an helper to return the VM state
- Check drmm_mutex_init() return code
- Remove the VM from the AS reclaim list when panthor_vm_active() is
  called
- Count the number of active VM users instead of considering there's
  at most one user (several scheduling groups can point to the same
  vM)
- Pre-allocate a VMA object for unmap operations (unmaps can trigger
  a sm_step_remap() call)
- Check vm->root_page_table instead of vm->pgtbl_ops to detect if
  the io-pgtable is trying to allocate the root page table
- Don't memset() the va_node in panthor_vm_alloc_va(), make it a
  caller requirement
- Fix the kernel doc in a few places
- Drop the panthor_vm::base offset constraint and modify
  panthor_vm_put() to explicitly check for a NULL value
- Fix unbalanced vm_bo refcount in panthor_gpuva_sm_step_remap()
- Drop stale comments about the shared_bos list
- Patch mmu_features::va_bits on 32-bit builds to reflect the
  io_pgtable limitation and let the UMD know about it

v3:
- Add acks for the MIT/GPL2 relicensing
- Propagate MMU faults to the scheduler
- Move pages pinning/unpinning out of the dma_signalling path
- Fix 32-bit support
- Rework the user/kernel VA range calculation
- Make the auto-VA range explicit (auto-VA range doesn't cover the full
  kernel-VA range on the MCU VM)
- Let callers of panthor_vm_alloc_va() allocate the drm_mm_node
  (embedded in panthor_kernel_bo now)
- Adjust things to match the latest drm_gpuvm changes (extobj tracking,
  resv prep and more)
- Drop the per-AS lock and use slots_lock (fixes a race on vm->as.id)
- Set as.id to -1 when reusing an address space from the LRU list
- Drop misleading comment about page faults
- Remove check for irq being assigned in panthor_mmu_unplug()

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-8-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_mmu.c | 2768 +++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_mmu.h |  102 +
 2 files changed, 2870 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c
new file mode 100644
index 000000000000..fdd35249169f
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_mmu.c
@@ -0,0 +1,2768 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include <linux/atomic.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-pgtable.h>
+#include <linux/iommu.h>
+#include <linux/kmemleak.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/shmem_fs.h>
+#include <linux/sizes.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+#define MAX_AS_SLOTS			32
+
+struct panthor_vm;
+
+/**
+ * struct panthor_as_slot - Address space slot
+ */
+struct panthor_as_slot {
+	/** @vm: VM bound to this slot. NULL is no VM is bound. */
+	struct panthor_vm *vm;
+};
+
+/**
+ * struct panthor_mmu - MMU related data
+ */
+struct panthor_mmu {
+	/** @irq: The MMU irq. */
+	struct panthor_irq irq;
+
+	/** @as: Address space related fields.
+	 *
+	 * The GPU has a limited number of address spaces (AS) slots, forcing
+	 * us to re-assign them to re-assign slots on-demand.
+	 */
+	struct {
+		/** @slots_lock: Lock protecting access to all other AS fields. */
+		struct mutex slots_lock;
+
+		/** @alloc_mask: Bitmask encoding the allocated slots. */
+		unsigned long alloc_mask;
+
+		/** @faulty_mask: Bitmask encoding the faulty slots. */
+		unsigned long faulty_mask;
+
+		/** @slots: VMs currently bound to the AS slots. */
+		struct panthor_as_slot slots[MAX_AS_SLOTS];
+
+		/**
+		 * @lru_list: List of least recently used VMs.
+		 *
+		 * We use this list to pick a VM to evict when all slots are
+		 * used.
+		 *
+		 * There should be no more active VMs than there are AS slots,
+		 * so this LRU is just here to keep VMs bound until there's
+		 * a need to release a slot, thus avoid unnecessary TLB/cache
+		 * flushes.
+		 */
+		struct list_head lru_list;
+	} as;
+
+	/** @vm: VMs management fields */
+	struct {
+		/** @lock: Lock protecting access to list. */
+		struct mutex lock;
+
+		/** @list: List containing all VMs. */
+		struct list_head list;
+
+		/** @reset_in_progress: True if a reset is in progress. */
+		bool reset_in_progress;
+
+		/** @wq: Workqueue used for the VM_BIND queues. */
+		struct workqueue_struct *wq;
+	} vm;
+};
+
+/**
+ * struct panthor_vm_pool - VM pool object
+ */
+struct panthor_vm_pool {
+	/** @xa: Array used for VM handle tracking. */
+	struct xarray xa;
+};
+
+/**
+ * struct panthor_vma - GPU mapping object
+ *
+ * This is used to track GEM mappings in GPU space.
+ */
+struct panthor_vma {
+	/** @base: Inherits from drm_gpuva. */
+	struct drm_gpuva base;
+
+	/** @node: Used to implement deferred release of VMAs. */
+	struct list_head node;
+
+	/**
+	 * @flags: Combination of drm_panthor_vm_bind_op_flags.
+	 *
+	 * Only map related flags are accepted.
+	 */
+	u32 flags;
+};
+
+/**
+ * struct panthor_vm_op_ctx - VM operation context
+ *
+ * With VM operations potentially taking place in a dma-signaling path, we
+ * need to make sure everything that might require resource allocation is
+ * pre-allocated upfront. This is what this operation context is far.
+ *
+ * We also collect resources that have been freed, so we can release them
+ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND
+ * request.
+ */
+struct panthor_vm_op_ctx {
+	/** @rsvd_page_tables: Pages reserved for the MMU page table update. */
+	struct {
+		/** @count: Number of pages reserved. */
+		u32 count;
+
+		/** @ptr: Point to the first unused page in the @pages table. */
+		u32 ptr;
+
+		/**
+		 * @page: Array of pages that can be used for an MMU page table update.
+		 *
+		 * After an VM operation, there might be free pages left in this array.
+		 * They should be returned to the pt_cache as part of the op_ctx cleanup.
+		 */
+		void **pages;
+	} rsvd_page_tables;
+
+	/**
+	 * @preallocated_vmas: Pre-allocated VMAs to handle the remap case.
+	 *
+	 * Partial unmap requests or map requests overlapping existing mappings will
+	 * trigger a remap call, which need to register up to three panthor_vma objects
+	 * (one for the new mapping, and two for the previous and next mappings).
+	 */
+	struct panthor_vma *preallocated_vmas[3];
+
+	/** @flags: Combination of drm_panthor_vm_bind_op_flags. */
+	u32 flags;
+
+	/** @va: Virtual range targeted by the VM operation. */
+	struct {
+		/** @addr: Start address. */
+		u64 addr;
+
+		/** @range: Range size. */
+		u64 range;
+	} va;
+
+	/**
+	 * @returned_vmas: List of panthor_vma objects returned after a VM operation.
+	 *
+	 * For unmap operations, this will contain all VMAs that were covered by the
+	 * specified VA range.
+	 *
+	 * For map operations, this will contain all VMAs that previously mapped to
+	 * the specified VA range.
+	 *
+	 * Those VMAs, and the resources they point to will be released as part of
+	 * the op_ctx cleanup operation.
+	 */
+	struct list_head returned_vmas;
+
+	/** @map: Fields specific to a map operation. */
+	struct {
+		/** @vm_bo: Buffer object to map. */
+		struct drm_gpuvm_bo *vm_bo;
+
+		/** @bo_offset: Offset in the buffer object. */
+		u64 bo_offset;
+
+		/**
+		 * @sgt: sg-table pointing to pages backing the GEM object.
+		 *
+		 * This is gathered at job creation time, such that we don't have
+		 * to allocate in ::run_job().
+		 */
+		struct sg_table *sgt;
+
+		/**
+		 * @new_vma: The new VMA object that will be inserted to the VA tree.
+		 */
+		struct panthor_vma *new_vma;
+	} map;
+};
+
+/**
+ * struct panthor_vm - VM object
+ *
+ * A VM is an object representing a GPU (or MCU) virtual address space.
+ * It embeds the MMU page table for this address space, a tree containing
+ * all the virtual mappings of GEM objects, and other things needed to manage
+ * the VM.
+ *
+ * Except for the MCU VM, which is managed by the kernel, all other VMs are
+ * created by userspace and mostly managed by userspace, using the
+ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl.
+ *
+ * A portion of the virtual address space is reserved for kernel objects,
+ * like heap chunks, and userspace gets to decide how much of the virtual
+ * address space is left to the kernel (half of the virtual address space
+ * by default).
+ */
+struct panthor_vm {
+	/**
+	 * @base: Inherit from drm_gpuvm.
+	 *
+	 * We delegate all the VA management to the common drm_gpuvm framework
+	 * and only implement hooks to update the MMU page table.
+	 */
+	struct drm_gpuvm base;
+
+	/**
+	 * @sched: Scheduler used for asynchronous VM_BIND request.
+	 *
+	 * We use a 1:1 scheduler here.
+	 */
+	struct drm_gpu_scheduler sched;
+
+	/**
+	 * @entity: Scheduling entity representing the VM_BIND queue.
+	 *
+	 * There's currently one bind queue per VM. It doesn't make sense to
+	 * allow more given the VM operations are serialized anyway.
+	 */
+	struct drm_sched_entity entity;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @memattr: Value to program to the AS_MEMATTR register. */
+	u64 memattr;
+
+	/** @pgtbl_ops: Page table operations. */
+	struct io_pgtable_ops *pgtbl_ops;
+
+	/** @root_page_table: Stores the root page table pointer. */
+	void *root_page_table;
+
+	/**
+	 * @op_lock: Lock used to serialize operations on a VM.
+	 *
+	 * The serialization of jobs queued to the VM_BIND queue is already
+	 * taken care of by drm_sched, but we need to serialize synchronous
+	 * and asynchronous VM_BIND request. This is what this lock is for.
+	 */
+	struct mutex op_lock;
+
+	/**
+	 * @op_ctx: The context attached to the currently executing VM operation.
+	 *
+	 * NULL when no operation is in progress.
+	 */
+	struct panthor_vm_op_ctx *op_ctx;
+
+	/**
+	 * @mm: Memory management object representing the auto-VA/kernel-VA.
+	 *
+	 * Used to auto-allocate VA space for kernel-managed objects (tiler
+	 * heaps, ...).
+	 *
+	 * For the MCU VM, this is managing the VA range that's used to map
+	 * all shared interfaces.
+	 *
+	 * For user VMs, the range is specified by userspace, and must not
+	 * exceed half of the VA space addressable.
+	 */
+	struct drm_mm mm;
+
+	/** @mm_lock: Lock protecting the @mm field. */
+	struct mutex mm_lock;
+
+	/** @kernel_auto_va: Automatic VA-range for kernel BOs. */
+	struct {
+		/** @start: Start of the automatic VA-range for kernel BOs. */
+		u64 start;
+
+		/** @size: Size of the automatic VA-range for kernel BOs. */
+		u64 end;
+	} kernel_auto_va;
+
+	/** @as: Address space related fields. */
+	struct {
+		/**
+		 * @id: ID of the address space this VM is bound to.
+		 *
+		 * A value of -1 means the VM is inactive/not bound.
+		 */
+		int id;
+
+		/** @active_cnt: Number of active users of this VM. */
+		refcount_t active_cnt;
+
+		/**
+		 * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
+		 *
+		 * Active VMs should not be inserted in the LRU list.
+		 */
+		struct list_head lru_node;
+	} as;
+
+	/**
+	 * @heaps: Tiler heap related fields.
+	 */
+	struct {
+		/**
+		 * @pool: The heap pool attached to this VM.
+		 *
+		 * Will stay NULL until someone creates a heap context on this VM.
+		 */
+		struct panthor_heap_pool *pool;
+
+		/** @lock: Lock used to protect access to @pool. */
+		struct mutex lock;
+	} heaps;
+
+	/** @node: Used to insert the VM in the panthor_mmu::vm::list. */
+	struct list_head node;
+
+	/** @for_mcu: True if this is the MCU VM. */
+	bool for_mcu;
+
+	/**
+	 * @destroyed: True if the VM was destroyed.
+	 *
+	 * No further bind requests should be queued to a destroyed VM.
+	 */
+	bool destroyed;
+
+	/**
+	 * @unusable: True if the VM has turned unusable because something
+	 * bad happened during an asynchronous request.
+	 *
+	 * We don't try to recover from such failures, because this implies
+	 * informing userspace about the specific operation that failed, and
+	 * hoping the userspace driver can replay things from there. This all
+	 * sounds very complicated for little gain.
+	 *
+	 * Instead, we should just flag the VM as unusable, and fail any
+	 * further request targeting this VM.
+	 *
+	 * We also provide a way to query a VM state, so userspace can destroy
+	 * it and create a new one.
+	 *
+	 * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST
+	 * situation, where the logical device needs to be re-created.
+	 */
+	bool unusable;
+
+	/**
+	 * @unhandled_fault: Unhandled fault happened.
+	 *
+	 * This should be reported to the scheduler, and the queue/group be
+	 * flagged as faulty as a result.
+	 */
+	bool unhandled_fault;
+};
+
+/**
+ * struct panthor_vm_bind_job - VM bind job
+ */
+struct panthor_vm_bind_job {
+	/** @base: Inherit from drm_sched_job. */
+	struct drm_sched_job base;
+
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */
+	struct work_struct cleanup_op_ctx_work;
+
+	/** @vm: VM targeted by the VM operation. */
+	struct panthor_vm *vm;
+
+	/** @ctx: Operation context. */
+	struct panthor_vm_op_ctx ctx;
+};
+
+/**
+ * @pt_cache: Cache used to allocate MMU page tables.
+ *
+ * The pre-allocation pattern forces us to over-allocate to plan for
+ * the worst case scenario, and return the pages we didn't use.
+ *
+ * Having a kmem_cache allows us to speed allocations.
+ */
+static struct kmem_cache *pt_cache;
+
+/**
+ * alloc_pt() - Custom page table allocator
+ * @cookie: Cookie passed at page table allocation time.
+ * @size: Size of the page table. This size should be fixed,
+ * and determined at creation time based on the granule size.
+ * @gfp: GFP flags.
+ *
+ * We want a custom allocator so we can use a cache for page table
+ * allocations and amortize the cost of the over-reservation that's
+ * done to allow asynchronous VM operations.
+ *
+ * Return: non-NULL on success, NULL if the allocation failed for any
+ * reason.
+ */
+static void *alloc_pt(void *cookie, size_t size, gfp_t gfp)
+{
+	struct panthor_vm *vm = cookie;
+	void *page;
+
+	/* Allocation of the root page table happening during init. */
+	if (unlikely(!vm->root_page_table)) {
+		struct page *p;
+
+		drm_WARN_ON(&vm->ptdev->base, vm->op_ctx);
+		p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev),
+				     gfp | __GFP_ZERO, get_order(size));
+		page = p ? page_address(p) : NULL;
+		vm->root_page_table = page;
+		return page;
+	}
+
+	/* We're not supposed to have anything bigger than 4k here, because we picked a
+	 * 4k granule size at init time.
+	 */
+	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
+		return NULL;
+
+	/* We must have some op_ctx attached to the VM and it must have at least one
+	 * free page.
+	 */
+	if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) ||
+	    drm_WARN_ON(&vm->ptdev->base,
+			vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count))
+		return NULL;
+
+	page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++];
+	memset(page, 0, SZ_4K);
+
+	/* Page table entries don't use virtual addresses, which trips out
+	 * kmemleak. kmemleak_alloc_phys() might work, but physical addresses
+	 * are mixed with other fields, and I fear kmemleak won't detect that
+	 * either.
+	 *
+	 * Let's just ignore memory passed to the page-table driver for now.
+	 */
+	kmemleak_ignore(page);
+	return page;
+}
+
+/**
+ * @free_pt() - Custom page table free function
+ * @cookie: Cookie passed at page table allocation time.
+ * @data: Page table to free.
+ * @size: Size of the page table. This size should be fixed,
+ * and determined at creation time based on the granule size.
+ */
+static void free_pt(void *cookie, void *data, size_t size)
+{
+	struct panthor_vm *vm = cookie;
+
+	if (unlikely(vm->root_page_table == data)) {
+		free_pages((unsigned long)data, get_order(size));
+		vm->root_page_table = NULL;
+		return;
+	}
+
+	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
+		return;
+
+	/* Return the page to the pt_cache. */
+	kmem_cache_free(pt_cache, data);
+}
+
+static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
+{
+	int ret;
+	u32 val;
+
+	/* Wait for the MMU status to indicate there is no active command, in
+	 * case one is pending.
+	 */
+	ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr),
+						val, !(val & AS_STATUS_AS_ACTIVE),
+						10, 100000);
+
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n");
+	}
+
+	return ret;
+}
+
+static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
+{
+	int status;
+
+	/* write AS_COMMAND when MMU is ready to accept another command */
+	status = wait_ready(ptdev, as_nr);
+	if (!status)
+		gpu_write(ptdev, AS_COMMAND(as_nr), cmd);
+
+	return status;
+}
+
+static void lock_region(struct panthor_device *ptdev, u32 as_nr,
+			u64 region_start, u64 size)
+{
+	u8 region_width;
+	u64 region;
+	u64 region_end = region_start + size;
+
+	if (!size)
+		return;
+
+	/*
+	 * The locked region is a naturally aligned power of 2 block encoded as
+	 * log2 minus(1).
+	 * Calculate the desired start/end and look for the highest bit which
+	 * differs. The smallest naturally aligned block must include this bit
+	 * change, the desired region starts with this bit (and subsequent bits)
+	 * zeroed and ends with the bit (and subsequent bits) set to one.
+	 */
+	region_width = max(fls64(region_start ^ (region_end - 1)),
+			   const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1;
+
+	/*
+	 * Mask off the low bits of region_start (which would be ignored by
+	 * the hardware anyway)
+	 */
+	region_start &= GENMASK_ULL(63, region_width);
+
+	region = region_width | region_start;
+
+	/* Lock the region that needs to be updated */
+	gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region));
+	gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region));
+	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
+}
+
+static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr,
+				      u64 iova, u64 size, u32 op)
+{
+	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
+
+	if (as_nr < 0)
+		return 0;
+
+	if (op != AS_COMMAND_UNLOCK)
+		lock_region(ptdev, as_nr, iova, size);
+
+	/* Run the MMU operation */
+	write_cmd(ptdev, as_nr, op);
+
+	/* Wait for the flush to complete */
+	return wait_ready(ptdev, as_nr);
+}
+
+static int mmu_hw_do_operation(struct panthor_vm *vm,
+			       u64 iova, u64 size, u32 op)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	int ret;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op);
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	return ret;
+}
+
+static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
+				 u64 transtab, u64 transcfg, u64 memattr)
+{
+	int ret;
+
+	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+	if (ret)
+		return ret;
+
+	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab));
+	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab));
+
+	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr));
+	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr));
+
+	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg));
+	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg));
+
+	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
+{
+	int ret;
+
+	ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
+	if (ret)
+		return ret;
+
+	gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0);
+	gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0);
+
+	gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0);
+	gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0);
+
+	gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
+	gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0);
+
+	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
+}
+
+static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value)
+{
+	/* Bits 16 to 31 mean REQ_COMPLETE. */
+	return value & GENMASK(15, 0);
+}
+
+static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
+{
+	return BIT(as);
+}
+
+/**
+ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults
+ * @vm: VM to check.
+ *
+ * Return: true if the VM has unhandled faults, false otherwise.
+ */
+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm)
+{
+	return vm->unhandled_fault;
+}
+
+/**
+ * panthor_vm_is_unusable() - Check if the VM is still usable
+ * @vm: VM to check.
+ *
+ * Return: true if the VM is unusable, false otherwise.
+ */
+bool panthor_vm_is_unusable(struct panthor_vm *vm)
+{
+	return vm->unusable;
+}
+
+static void panthor_vm_release_as_locked(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+
+	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
+
+	if (drm_WARN_ON(&ptdev->base, vm->as.id < 0))
+		return;
+
+	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
+	clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
+	refcount_set(&vm->as.active_cnt, 0);
+	list_del_init(&vm->as.lru_node);
+	vm->as.id = -1;
+}
+
+/**
+ * panthor_vm_active() - Flag a VM as active
+ * @VM: VM to flag as active.
+ *
+ * Assigns an address space to a VM so it can be used by the GPU/MCU.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_active(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
+	int ret = 0, as, cookie;
+	u64 transtab, transcfg;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return -ENODEV;
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_dev_exit;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+
+	if (refcount_inc_not_zero(&vm->as.active_cnt))
+		goto out_unlock;
+
+	as = vm->as.id;
+	if (as >= 0) {
+		/* Unhandled pagefault on this AS, the MMU was disabled. We need to
+		 * re-enable the MMU after clearing+unmasking the AS interrupts.
+		 */
+		if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
+			goto out_enable_as;
+
+		goto out_make_active;
+	}
+
+	/* Check for a free AS */
+	if (vm->for_mcu) {
+		drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0));
+		as = 0;
+	} else {
+		as = ffz(ptdev->mmu->as.alloc_mask | BIT(0));
+	}
+
+	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
+		struct panthor_vm *lru_vm;
+
+		lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
+						  struct panthor_vm,
+						  as.lru_node);
+		if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+
+		drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
+		as = lru_vm->as.id;
+		panthor_vm_release_as_locked(lru_vm);
+	}
+
+	/* Assign the free or reclaimed AS to the FD */
+	vm->as.id = as;
+	set_bit(as, &ptdev->mmu->as.alloc_mask);
+	ptdev->mmu->as.slots[as].vm = vm;
+
+out_enable_as:
+	transtab = cfg->arm_lpae_s1_cfg.ttbr;
+	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB |
+		   AS_TRANSCFG_PTW_RA |
+		   AS_TRANSCFG_ADRMODE_AARCH64_4K |
+		   AS_TRANSCFG_INA_BITS(55 - va_bits);
+	if (ptdev->coherent)
+		transcfg |= AS_TRANSCFG_PTW_SH_OS;
+
+	/* If the VM is re-activated, we clear the fault. */
+	vm->unhandled_fault = false;
+
+	/* Unhandled pagefault on this AS, clear the fault and re-enable interrupts
+	 * before enabling the AS.
+	 */
+	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
+		gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as));
+		ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
+		gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask);
+	}
+
+	ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr);
+
+out_make_active:
+	if (!ret) {
+		refcount_set(&vm->as.active_cnt, 1);
+		list_del_init(&vm->as.lru_node);
+	}
+
+out_unlock:
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+/**
+ * panthor_vm_idle() - Flag a VM idle
+ * @VM: VM to flag as idle.
+ *
+ * When we know the GPU is done with the VM (no more jobs to process),
+ * we can relinquish the AS slot attached to this VM, if any.
+ *
+ * We don't release the slot immediately, but instead place the VM in
+ * the LRU list, so it can be evicted if another VM needs an AS slot.
+ * This way, VMs keep attached to the AS they were given until we run
+ * out of free slot, limiting the number of MMU operations (TLB flush
+ * and other AS updates).
+ */
+void panthor_vm_idle(struct panthor_vm *vm)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+
+	if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock))
+		return;
+
+	if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node)))
+		list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list);
+
+	refcount_set(&vm->as.active_cnt, 0);
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
+static void panthor_vm_stop(struct panthor_vm *vm)
+{
+	drm_sched_stop(&vm->sched, NULL);
+}
+
+static void panthor_vm_start(struct panthor_vm *vm)
+{
+	drm_sched_start(&vm->sched, true);
+}
+
+/**
+ * panthor_vm_as() - Get the AS slot attached to a VM
+ * @vm: VM to get the AS slot of.
+ *
+ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise.
+ */
+int panthor_vm_as(struct panthor_vm *vm)
+{
+	return vm->as.id;
+}
+
+static size_t get_pgsize(u64 addr, size_t size, size_t *count)
+{
+	/*
+	 * io-pgtable only operates on multiple pages within a single table
+	 * entry, so we need to split at boundaries of the table size, i.e.
+	 * the next block size up. The distance from address A to the next
+	 * boundary of block size B is logically B - A % B, but in unsigned
+	 * two's complement where B is a power of two we get the equivalence
+	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
+	 */
+	size_t blk_offset = -addr % SZ_2M;
+
+	if (blk_offset || size < SZ_2M) {
+		*count = min_not_zero(blk_offset, size) / SZ_4K;
+		return SZ_4K;
+	}
+	blk_offset = -addr % SZ_1G ?: SZ_1G;
+	*count = min(blk_offset, size) / SZ_2M;
+	return SZ_2M;
+}
+
+static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	int ret = 0, cookie;
+
+	if (vm->as.id < 0)
+		return 0;
+
+	/* If the device is unplugged, we just silently skip the flush. */
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return 0;
+
+	/* Flush the PTs only if we're already awake */
+	if (pm_runtime_active(ptdev->base.dev))
+		ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	struct io_pgtable_ops *ops = vm->pgtbl_ops;
+	u64 offset = 0;
+
+	drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size);
+
+	while (offset < size) {
+		size_t unmapped_sz = 0, pgcount;
+		size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount);
+
+		unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
+
+		if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
+			drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
+				iova + offset + unmapped_sz,
+				iova + offset + pgsize * pgcount,
+				iova, iova + size);
+			panthor_vm_flush_range(vm, iova, offset + unmapped_sz);
+			return  -EINVAL;
+		}
+		offset += unmapped_sz;
+	}
+
+	return panthor_vm_flush_range(vm, iova, size);
+}
+
+static int
+panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot,
+		     struct sg_table *sgt, u64 offset, u64 size)
+{
+	struct panthor_device *ptdev = vm->ptdev;
+	unsigned int count;
+	struct scatterlist *sgl;
+	struct io_pgtable_ops *ops = vm->pgtbl_ops;
+	u64 start_iova = iova;
+	int ret;
+
+	if (!size)
+		return 0;
+
+	for_each_sgtable_dma_sg(sgt, sgl, count) {
+		dma_addr_t paddr = sg_dma_address(sgl);
+		size_t len = sg_dma_len(sgl);
+
+		if (len <= offset) {
+			offset -= len;
+			continue;
+		}
+
+		paddr += offset;
+		len -= offset;
+		len = min_t(size_t, len, size);
+		size -= len;
+
+		drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx",
+			vm->as.id, iova, &paddr, len);
+
+		while (len) {
+			size_t pgcount, mapped = 0;
+			size_t pgsize = get_pgsize(iova | paddr, len, &pgcount);
+
+			ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
+					     GFP_KERNEL, &mapped);
+			iova += mapped;
+			paddr += mapped;
+			len -= mapped;
+
+			if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
+				ret = -ENOMEM;
+
+			if (ret) {
+				/* If something failed, unmap what we've already mapped before
+				 * returning. The unmap call is not supposed to fail.
+				 */
+				drm_WARN_ON(&ptdev->base,
+					    panthor_vm_unmap_pages(vm, start_iova,
+								   iova - start_iova));
+				return ret;
+			}
+		}
+
+		if (!size)
+			break;
+	}
+
+	return panthor_vm_flush_range(vm, start_iova, iova - start_iova);
+}
+
+static int flags_to_prot(u32 flags)
+{
+	int prot = 0;
+
+	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC)
+		prot |= IOMMU_NOEXEC;
+
+	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED))
+		prot |= IOMMU_CACHE;
+
+	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY)
+		prot |= IOMMU_READ;
+	else
+		prot |= IOMMU_READ | IOMMU_WRITE;
+
+	return prot;
+}
+
+/**
+ * panthor_vm_alloc_va() - Allocate a region in the auto-va space
+ * @VM: VM to allocate a region on.
+ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
+ * wants the VA to be automatically allocated from the auto-VA range.
+ * @size: size of the VA range.
+ * @va_node: drm_mm_node to initialize. Must be zero-initialized.
+ *
+ * Some GPU objects, like heap chunks, are fully managed by the kernel and
+ * need to be mapped to the userspace VM, in the region reserved for kernel
+ * objects.
+ *
+ * This function takes care of allocating a region in the kernel auto-VA space.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+int
+panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
+		    struct drm_mm_node *va_node)
+{
+	int ret;
+
+	if (!size || (size & ~PAGE_MASK))
+		return -EINVAL;
+
+	if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK))
+		return -EINVAL;
+
+	mutex_lock(&vm->mm_lock);
+	if (va != PANTHOR_VM_KERNEL_AUTO_VA) {
+		va_node->start = va;
+		va_node->size = size;
+		ret = drm_mm_reserve_node(&vm->mm, va_node);
+	} else {
+		ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size,
+						  size >= SZ_2M ? SZ_2M : SZ_4K,
+						  0, vm->kernel_auto_va.start,
+						  vm->kernel_auto_va.end,
+						  DRM_MM_INSERT_BEST);
+	}
+	mutex_unlock(&vm->mm_lock);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
+ * @VM: VM to free the region on.
+ * @va_node: Memory node representing the region to free.
+ */
+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node)
+{
+	mutex_lock(&vm->mm_lock);
+	drm_mm_remove_node(va_node);
+	mutex_unlock(&vm->mm_lock);
+}
+
+static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj);
+	struct drm_gpuvm *vm = vm_bo->vm;
+	bool unpin;
+
+	/* We must retain the GEM before calling drm_gpuvm_bo_put(),
+	 * otherwise the mutex might be destroyed while we hold it.
+	 * Same goes for the VM, since we take the VM resv lock.
+	 */
+	drm_gem_object_get(&bo->base.base);
+	drm_gpuvm_get(vm);
+
+	/* We take the resv lock to protect against concurrent accesses to the
+	 * gpuvm evicted/extobj lists that are modified in
+	 * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put()
+	 * releases sthe last vm_bo reference.
+	 * We take the BO GPUVA list lock to protect the vm_bo removal from the
+	 * GEM vm_bo list.
+	 */
+	dma_resv_lock(drm_gpuvm_resv(vm), NULL);
+	mutex_lock(&bo->gpuva_list_lock);
+	unpin = drm_gpuvm_bo_put(vm_bo);
+	mutex_unlock(&bo->gpuva_list_lock);
+	dma_resv_unlock(drm_gpuvm_resv(vm));
+
+	/* If the vm_bo object was destroyed, release the pin reference that
+	 * was hold by this object.
+	 */
+	if (unpin && !bo->base.base.import_attach)
+		drm_gem_shmem_unpin(&bo->base);
+
+	drm_gpuvm_put(vm);
+	drm_gem_object_put(&bo->base.base);
+}
+
+static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+				      struct panthor_vm *vm)
+{
+	struct panthor_vma *vma, *tmp_vma;
+
+	u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
+				 op_ctx->rsvd_page_tables.ptr;
+
+	if (remaining_pt_count) {
+		kmem_cache_free_bulk(pt_cache, remaining_pt_count,
+				     op_ctx->rsvd_page_tables.pages +
+				     op_ctx->rsvd_page_tables.ptr);
+	}
+
+	kfree(op_ctx->rsvd_page_tables.pages);
+
+	if (op_ctx->map.vm_bo)
+		panthor_vm_bo_put(op_ctx->map.vm_bo);
+
+	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
+		kfree(op_ctx->preallocated_vmas[i]);
+
+	list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) {
+		list_del(&vma->node);
+		panthor_vm_bo_put(vma->base.vm_bo);
+		kfree(vma);
+	}
+}
+
+static struct panthor_vma *
+panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx)
+{
+	for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
+		struct panthor_vma *vma = op_ctx->preallocated_vmas[i];
+
+		if (vma) {
+			op_ctx->preallocated_vmas[i] = NULL;
+			return vma;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
+{
+	u32 vma_count;
+
+	switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		/* One VMA for the new mapping, and two more VMAs for the remap case
+		 * which might contain both a prev and next VA.
+		 */
+		vma_count = 3;
+		break;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		/* Partial unmaps might trigger a remap with either a prev or a next VA,
+		 * but not both.
+		 */
+		vma_count = 1;
+		break;
+
+	default:
+		return 0;
+	}
+
+	for (u32 i = 0; i < vma_count; i++) {
+		struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+
+		if (!vma)
+			return -ENOMEM;
+
+		op_ctx->preallocated_vmas[i] = vma;
+	}
+
+	return 0;
+}
+
+#define PANTHOR_VM_BIND_OP_MAP_FLAGS \
+	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \
+	 DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+
+static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+					 struct panthor_vm *vm,
+					 struct panthor_gem_object *bo,
+					 u64 offset,
+					 u64 size, u64 va,
+					 u32 flags)
+{
+	struct drm_gpuvm_bo *preallocated_vm_bo;
+	struct sg_table *sgt = NULL;
+	u64 pt_count;
+	int ret;
+
+	if (!bo)
+		return -EINVAL;
+
+	if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) ||
+	    (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
+		return -EINVAL;
+
+	/* Make sure the VA and size are aligned and in-bounds. */
+	if (size > bo->base.base.size || offset > bo->base.base.size - size)
+		return -EINVAL;
+
+	/* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */
+	if (bo->exclusive_vm_root_gem &&
+	    bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm))
+		return -EINVAL;
+
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->flags = flags;
+	op_ctx->va.range = size;
+	op_ctx->va.addr = va;
+
+	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
+	if (ret)
+		goto err_cleanup;
+
+	if (!bo->base.base.import_attach) {
+		/* Pre-reserve the BO pages, so the map operation doesn't have to
+		 * allocate.
+		 */
+		ret = drm_gem_shmem_pin(&bo->base);
+		if (ret)
+			goto err_cleanup;
+	}
+
+	sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+	if (IS_ERR(sgt)) {
+		if (!bo->base.base.import_attach)
+			drm_gem_shmem_unpin(&bo->base);
+
+		ret = PTR_ERR(sgt);
+		goto err_cleanup;
+	}
+
+	op_ctx->map.sgt = sgt;
+
+	preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base);
+	if (!preallocated_vm_bo) {
+		if (!bo->base.base.import_attach)
+			drm_gem_shmem_unpin(&bo->base);
+
+		ret = -ENOMEM;
+		goto err_cleanup;
+	}
+
+	mutex_lock(&bo->gpuva_list_lock);
+	op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo);
+	mutex_unlock(&bo->gpuva_list_lock);
+
+	/* If the a vm_bo for this <VM,BO> combination exists, it already
+	 * retains a pin ref, and we can release the one we took earlier.
+	 *
+	 * If our pre-allocated vm_bo is picked, it now retains the pin ref,
+	 * which will be released in panthor_vm_bo_put().
+	 */
+	if (preallocated_vm_bo != op_ctx->map.vm_bo &&
+	    !bo->base.base.import_attach)
+		drm_gem_shmem_unpin(&bo->base);
+
+	op_ctx->map.bo_offset = offset;
+
+	/* L1, L2 and L3 page tables.
+	 * We could optimize L3 allocation by iterating over the sgt and merging
+	 * 2M contiguous blocks, but it's simpler to over-provision and return
+	 * the pages if they're not used.
+	 */
+	pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) +
+		   ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) +
+		   ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21);
+
+	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+						 sizeof(*op_ctx->rsvd_page_tables.pages),
+						 GFP_KERNEL);
+	if (!op_ctx->rsvd_page_tables.pages)
+		goto err_cleanup;
+
+	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+				    op_ctx->rsvd_page_tables.pages);
+	op_ctx->rsvd_page_tables.count = ret;
+	if (ret != pt_count) {
+		ret = -ENOMEM;
+		goto err_cleanup;
+	}
+
+	/* Insert BO into the extobj list last, when we know nothing can fail. */
+	dma_resv_lock(panthor_vm_resv(vm), NULL);
+	drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo);
+	dma_resv_unlock(panthor_vm_resv(vm));
+
+	return 0;
+
+err_cleanup:
+	panthor_vm_cleanup_op_ctx(op_ctx, vm);
+	return ret;
+}
+
+static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+					   struct panthor_vm *vm,
+					   u64 va, u64 size)
+{
+	u32 pt_count = 0;
+	int ret;
+
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->va.range = size;
+	op_ctx->va.addr = va;
+	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
+
+	/* Pre-allocate L3 page tables to account for the split-2M-block
+	 * situation on unmap.
+	 */
+	if (va != ALIGN(va, SZ_2M))
+		pt_count++;
+
+	if (va + size != ALIGN(va + size, SZ_2M) &&
+	    ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M))
+		pt_count++;
+
+	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
+	if (ret)
+		goto err_cleanup;
+
+	if (pt_count) {
+		op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
+							 sizeof(*op_ctx->rsvd_page_tables.pages),
+							 GFP_KERNEL);
+		if (!op_ctx->rsvd_page_tables.pages)
+			goto err_cleanup;
+
+		ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
+					    op_ctx->rsvd_page_tables.pages);
+		if (ret != pt_count) {
+			ret = -ENOMEM;
+			goto err_cleanup;
+		}
+		op_ctx->rsvd_page_tables.count = pt_count;
+	}
+
+	return 0;
+
+err_cleanup:
+	panthor_vm_cleanup_op_ctx(op_ctx, vm);
+	return ret;
+}
+
+static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx,
+						struct panthor_vm *vm)
+{
+	memset(op_ctx, 0, sizeof(*op_ctx));
+	INIT_LIST_HEAD(&op_ctx->returned_vmas);
+	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
+}
+
+/**
+ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address
+ * @vm: VM to look into.
+ * @va: Virtual address to search for.
+ * @bo_offset: Offset of the GEM object mapped at this virtual address.
+ * Only valid on success.
+ *
+ * The object returned by this function might no longer be mapped when the
+ * function returns. It's the caller responsibility to ensure there's no
+ * concurrent map/unmap operations making the returned value invalid, or
+ * make sure it doesn't matter if the object is no longer mapped.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_gem_object *
+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset)
+{
+	struct panthor_gem_object *bo = ERR_PTR(-ENOENT);
+	struct drm_gpuva *gpuva;
+	struct panthor_vma *vma;
+
+	/* Take the VM lock to prevent concurrent map/unmap operations. */
+	mutex_lock(&vm->op_lock);
+	gpuva = drm_gpuva_find_first(&vm->base, va, 1);
+	vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL;
+	if (vma && vma->base.gem.obj) {
+		drm_gem_object_get(vma->base.gem.obj);
+		bo = to_panthor_bo(vma->base.gem.obj);
+		*bo_offset = vma->base.gem.offset + (va - vma->base.va.addr);
+	}
+	mutex_unlock(&vm->op_lock);
+
+	return bo;
+}
+
+#define PANTHOR_VM_MIN_KERNEL_VA_SIZE	SZ_256M
+
+static u64
+panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args,
+				    u64 full_va_range)
+{
+	u64 user_va_range;
+
+	/* Make sure we have a minimum amount of VA space for kernel objects. */
+	if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE)
+		return 0;
+
+	if (args->user_va_range) {
+		/* Use the user provided value if != 0. */
+		user_va_range = args->user_va_range;
+	} else if (TASK_SIZE_OF(current) < full_va_range) {
+		/* If the task VM size is smaller than the GPU VA range, pick this
+		 * as our default user VA range, so userspace can CPU/GPU map buffers
+		 * at the same address.
+		 */
+		user_va_range = TASK_SIZE_OF(current);
+	} else {
+		/* If the GPU VA range is smaller than the task VM size, we
+		 * just have to live with the fact we won't be able to map
+		 * all buffers at the same GPU/CPU address.
+		 *
+		 * If the GPU VA range is bigger than 4G (more than 32-bit of
+		 * VA), we split the range in two, and assign half of it to
+		 * the user and the other half to the kernel, if it's not, we
+		 * keep the kernel VA space as small as possible.
+		 */
+		user_va_range = full_va_range > SZ_4G ?
+				full_va_range / 2 :
+				full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
+	}
+
+	if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range)
+		user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
+
+	return user_va_range;
+}
+
+#define PANTHOR_VM_CREATE_FLAGS		0
+
+static int
+panthor_vm_create_check_args(const struct panthor_device *ptdev,
+			     const struct drm_panthor_vm_create *args,
+			     u64 *kernel_va_start, u64 *kernel_va_range)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	u64 full_va_range = 1ull << va_bits;
+	u64 user_va_range;
+
+	if (args->flags & ~PANTHOR_VM_CREATE_FLAGS)
+		return -EINVAL;
+
+	user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range);
+	if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range))
+		return -EINVAL;
+
+	/* Pick a kernel VA range that's a power of two, to have a clear split. */
+	*kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range);
+	*kernel_va_start = full_va_range - *kernel_va_range;
+	return 0;
+}
+
+/*
+ * Only 32 VMs per open file. If that becomes a limiting factor, we can
+ * increase this number.
+ */
+#define PANTHOR_MAX_VMS_PER_FILE	32
+
+/**
+ * panthor_vm_pool_create_vm() - Create a VM
+ * @pool: The VM to create this VM on.
+ * @kernel_va_start: Start of the region reserved for kernel objects.
+ * @kernel_va_range: Size of the region reserved for kernel objects.
+ *
+ * Return: a positive VM ID on success, a negative error code otherwise.
+ */
+int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
+			      struct panthor_vm_pool *pool,
+			      struct drm_panthor_vm_create *args)
+{
+	u64 kernel_va_start, kernel_va_range;
+	struct panthor_vm *vm;
+	int ret;
+	u32 id;
+
+	ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range);
+	if (ret)
+		return ret;
+
+	vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range,
+			       kernel_va_start, kernel_va_range);
+	if (IS_ERR(vm))
+		return PTR_ERR(vm);
+
+	ret = xa_alloc(&pool->xa, &id, vm,
+		       XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL);
+
+	if (ret) {
+		panthor_vm_put(vm);
+		return ret;
+	}
+
+	args->user_va_range = kernel_va_start;
+	return id;
+}
+
+static void panthor_vm_destroy(struct panthor_vm *vm)
+{
+	if (!vm)
+		return;
+
+	vm->destroyed = true;
+
+	mutex_lock(&vm->heaps.lock);
+	panthor_heap_pool_destroy(vm->heaps.pool);
+	vm->heaps.pool = NULL;
+	mutex_unlock(&vm->heaps.lock);
+
+	drm_WARN_ON(&vm->ptdev->base,
+		    panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range));
+	panthor_vm_put(vm);
+}
+
+/**
+ * panthor_vm_pool_destroy_vm() - Destroy a VM.
+ * @pool: VM pool.
+ * @handle: VM handle.
+ *
+ * This function doesn't free the VM object or its resources, it just kills
+ * all mappings, and makes sure nothing can be mapped after that point.
+ *
+ * If there was any active jobs at the time this function is called, these
+ * jobs should experience page faults and be killed as a result.
+ *
+ * The VM resources are freed when the last reference on the VM object is
+ * dropped.
+ */
+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
+{
+	struct panthor_vm *vm;
+
+	vm = xa_erase(&pool->xa, handle);
+
+	panthor_vm_destroy(vm);
+
+	return vm ? 0 : -EINVAL;
+}
+
+/**
+ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle
+ * @pool: VM pool to check.
+ * @handle: Handle of the VM to retrieve.
+ *
+ * Return: A valid pointer if the VM exists, NULL otherwise.
+ */
+struct panthor_vm *
+panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
+{
+	struct panthor_vm *vm;
+
+	vm = panthor_vm_get(xa_load(&pool->xa, handle));
+
+	return vm;
+}
+
+/**
+ * panthor_vm_pool_destroy() - Destroy a VM pool.
+ * @pfile: File.
+ *
+ * Destroy all VMs in the pool, and release the pool resources.
+ *
+ * Note that VMs can outlive the pool they were created from if other
+ * objects hold a reference to there VMs.
+ */
+void panthor_vm_pool_destroy(struct panthor_file *pfile)
+{
+	struct panthor_vm *vm;
+	unsigned long i;
+
+	if (!pfile->vms)
+		return;
+
+	xa_for_each(&pfile->vms->xa, i, vm)
+		panthor_vm_destroy(vm);
+
+	xa_destroy(&pfile->vms->xa);
+	kfree(pfile->vms);
+}
+
+/**
+ * panthor_vm_pool_create() - Create a VM pool
+ * @pfile: File.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_pool_create(struct panthor_file *pfile)
+{
+	pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL);
+	if (!pfile->vms)
+		return -ENOMEM;
+
+	xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1);
+	return 0;
+}
+
+/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */
+static void mmu_tlb_flush_all(void *cookie)
+{
+}
+
+static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie)
+{
+}
+
+static const struct iommu_flush_ops mmu_tlb_ops = {
+	.tlb_flush_all = mmu_tlb_flush_all,
+	.tlb_flush_walk = mmu_tlb_flush_walk,
+};
+
+static const char *access_type_name(struct panthor_device *ptdev,
+				    u32 fault_status)
+{
+	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
+	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
+		return "ATOMIC";
+	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
+		return "READ";
+	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
+		return "WRITE";
+	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
+		return "EXECUTE";
+	default:
+		drm_WARN_ON(&ptdev->base, 1);
+		return NULL;
+	}
+}
+
+static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	bool has_unhandled_faults = false;
+
+	status = panthor_mmu_fault_mask(ptdev, status);
+	while (status) {
+		u32 as = ffs(status | (status >> 16)) - 1;
+		u32 mask = panthor_mmu_as_fault_mask(ptdev, as);
+		u32 new_int_mask;
+		u64 addr;
+		u32 fault_status;
+		u32 exception_type;
+		u32 access_type;
+		u32 source_id;
+
+		fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
+		addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as));
+		addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32;
+
+		/* decode the fault status */
+		exception_type = fault_status & 0xFF;
+		access_type = (fault_status >> 8) & 0x3;
+		source_id = (fault_status >> 16);
+
+		mutex_lock(&ptdev->mmu->as.slots_lock);
+
+		ptdev->mmu->as.faulty_mask |= mask;
+		new_int_mask =
+			panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask);
+
+		/* terminal fault, print info about the fault */
+		drm_err(&ptdev->base,
+			"Unhandled Page fault in AS%d at VA 0x%016llX\n"
+			"raw fault status: 0x%X\n"
+			"decoded fault status: %s\n"
+			"exception type 0x%X: %s\n"
+			"access type 0x%X: %s\n"
+			"source id 0x%X\n",
+			as, addr,
+			fault_status,
+			(fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"),
+			exception_type, panthor_exception_name(ptdev, exception_type),
+			access_type, access_type_name(ptdev, fault_status),
+			source_id);
+
+		/* Ignore MMU interrupts on this AS until it's been
+		 * re-enabled.
+		 */
+		ptdev->mmu->irq.mask = new_int_mask;
+		gpu_write(ptdev, MMU_INT_MASK, new_int_mask);
+
+		if (ptdev->mmu->as.slots[as].vm)
+			ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
+
+		/* Disable the MMU to kill jobs on this AS. */
+		panthor_mmu_as_disable(ptdev, as);
+		mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+		status &= ~mask;
+		has_unhandled_faults = true;
+	}
+
+	if (has_unhandled_faults)
+		panthor_sched_report_mmu_fault(ptdev);
+}
+PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
+
+/**
+ * panthor_mmu_suspend() - Suspend the MMU logic
+ * @ptdev: Device.
+ *
+ * All we do here is de-assign the AS slots on all active VMs, so things
+ * get flushed to the main memory, and no further access to these VMs are
+ * possible.
+ *
+ * We also suspend the MMU IRQ.
+ */
+void panthor_mmu_suspend(struct panthor_device *ptdev)
+{
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm) {
+			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
+			panthor_vm_release_as_locked(vm);
+		}
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+}
+
+/**
+ * panthor_mmu_resume() - Resume the MMU logic
+ * @ptdev: Device.
+ *
+ * Resume the IRQ.
+ *
+ * We don't re-enable previously active VMs. We assume other parts of the
+ * driver will call panthor_vm_active() on the VMs they intend to use.
+ */
+void panthor_mmu_resume(struct panthor_device *ptdev)
+{
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	ptdev->mmu->as.alloc_mask = 0;
+	ptdev->mmu->as.faulty_mask = 0;
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+}
+
+/**
+ * panthor_mmu_pre_reset() - Prepare for a reset
+ * @ptdev: Device.
+ *
+ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we
+ * don't get asked to do a VM operation while the GPU is down.
+ *
+ * We don't cleanly shutdown the AS slots here, because the reset might
+ * come from an AS_ACTIVE_BIT stuck situation.
+ */
+void panthor_mmu_pre_reset(struct panthor_device *ptdev)
+{
+	struct panthor_vm *vm;
+
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	ptdev->mmu->vm.reset_in_progress = true;
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node)
+		panthor_vm_stop(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+}
+
+/**
+ * panthor_mmu_post_reset() - Restore things after a reset
+ * @ptdev: Device.
+ *
+ * Put the MMU logic back in action after a reset. That implies resuming the
+ * IRQ and re-enabling the VM_BIND queues.
+ */
+void panthor_mmu_post_reset(struct panthor_device *ptdev)
+{
+	struct panthor_vm *vm;
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+
+	/* Now that the reset is effective, we can assume that none of the
+	 * AS slots are setup, and clear the faulty flags too.
+	 */
+	ptdev->mmu->as.alloc_mask = 0;
+	ptdev->mmu->as.faulty_mask = 0;
+
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm)
+			panthor_vm_release_as_locked(vm);
+	}
+
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0));
+
+	/* Restart the VM_BIND queues. */
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
+		panthor_vm_start(vm);
+	}
+	ptdev->mmu->vm.reset_in_progress = false;
+	mutex_unlock(&ptdev->mmu->vm.lock);
+}
+
+static void panthor_vm_free(struct drm_gpuvm *gpuvm)
+{
+	struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base);
+	struct panthor_device *ptdev = vm->ptdev;
+
+	mutex_lock(&vm->heaps.lock);
+	if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
+		panthor_heap_pool_destroy(vm->heaps.pool);
+	mutex_unlock(&vm->heaps.lock);
+	mutex_destroy(&vm->heaps.lock);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_del(&vm->node);
+	/* Restore the scheduler state so we can call drm_sched_entity_destroy()
+	 * and drm_sched_fini(). If get there, that means we have no job left
+	 * and no new jobs can be queued, so we can start the scheduler without
+	 * risking interfering with the reset.
+	 */
+	if (ptdev->mmu->vm.reset_in_progress)
+		panthor_vm_start(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	drm_sched_entity_destroy(&vm->entity);
+	drm_sched_fini(&vm->sched);
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	if (vm->as.id >= 0) {
+		int cookie;
+
+		if (drm_dev_enter(&ptdev->base, &cookie)) {
+			panthor_mmu_as_disable(ptdev, vm->as.id);
+			drm_dev_exit(cookie);
+		}
+
+		ptdev->mmu->as.slots[vm->as.id].vm = NULL;
+		clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask);
+		list_del(&vm->as.lru_node);
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+
+	free_io_pgtable_ops(vm->pgtbl_ops);
+
+	drm_mm_takedown(&vm->mm);
+	kfree(vm);
+}
+
+/**
+ * panthor_vm_put() - Release a reference on a VM
+ * @vm: VM to release the reference on. Can be NULL.
+ */
+void panthor_vm_put(struct panthor_vm *vm)
+{
+	drm_gpuvm_put(vm ? &vm->base : NULL);
+}
+
+/**
+ * panthor_vm_get() - Get a VM reference
+ * @vm: VM to get the reference on. Can be NULL.
+ *
+ * Return: @vm value.
+ */
+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm)
+{
+	if (vm)
+		drm_gpuvm_get(&vm->base);
+
+	return vm;
+}
+
+/**
+ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM
+ * @vm: VM to query the heap pool on.
+ * @create: True if the heap pool should be created when it doesn't exist.
+ *
+ * Heap pools are per-VM. This function allows one to retrieve the heap pool
+ * attached to a VM.
+ *
+ * If no heap pool exists yet, and @create is true, we create one.
+ *
+ * The returned panthor_heap_pool should be released with panthor_heap_pool_put().
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create)
+{
+	struct panthor_heap_pool *pool;
+
+	mutex_lock(&vm->heaps.lock);
+	if (!vm->heaps.pool && create) {
+		if (vm->destroyed)
+			pool = ERR_PTR(-EINVAL);
+		else
+			pool = panthor_heap_pool_create(vm->ptdev, vm);
+
+		if (!IS_ERR(pool))
+			vm->heaps.pool = panthor_heap_pool_get(pool);
+	} else {
+		pool = panthor_heap_pool_get(vm->heaps.pool);
+	}
+	mutex_unlock(&vm->heaps.lock);
+
+	return pool;
+}
+
+static u64 mair_to_memattr(u64 mair)
+{
+	u64 memattr = 0;
+	u32 i;
+
+	for (i = 0; i < 8; i++) {
+		u8 in_attr = mair >> (8 * i), out_attr;
+		u8 outer = in_attr >> 4, inner = in_attr & 0xf;
+
+		/* For caching to be enabled, inner and outer caching policy
+		 * have to be both write-back, if one of them is write-through
+		 * or non-cacheable, we just choose non-cacheable. Device
+		 * memory is also translated to non-cacheable.
+		 */
+		if (!(outer & 3) || !(outer & 4) || !(inner & 4)) {
+			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC |
+				   AS_MEMATTR_AARCH64_SH_MIDGARD_INNER |
+				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
+		} else {
+			/* Use SH_CPU_INNER mode so SH_IS, which is used when
+			 * IOMMU_CACHE is set, actually maps to the standard
+			 * definition of inner-shareable and not Mali's
+			 * internal-shareable mode.
+			 */
+			out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB |
+				   AS_MEMATTR_AARCH64_SH_CPU_INNER |
+				   AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2);
+		}
+
+		memattr |= (u64)out_attr << (8 * i);
+	}
+
+	return memattr;
+}
+
+static void panthor_vma_link(struct panthor_vm *vm,
+			     struct panthor_vma *vma,
+			     struct drm_gpuvm_bo *vm_bo)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
+
+	mutex_lock(&bo->gpuva_list_lock);
+	drm_gpuva_link(&vma->base, vm_bo);
+	drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo));
+	mutex_unlock(&bo->gpuva_list_lock);
+}
+
+static void panthor_vma_unlink(struct panthor_vm *vm,
+			       struct panthor_vma *vma)
+{
+	struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj);
+	struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo);
+
+	mutex_lock(&bo->gpuva_list_lock);
+	drm_gpuva_unlink(&vma->base);
+	mutex_unlock(&bo->gpuva_list_lock);
+
+	/* drm_gpuva_unlink() release the vm_bo, but we manually retained it
+	 * when entering this function, so we can implement deferred VMA
+	 * destruction. Re-assign it here.
+	 */
+	vma->base.vm_bo = vm_bo;
+	list_add_tail(&vma->node, &vm->op_ctx->returned_vmas);
+}
+
+static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
+{
+	INIT_LIST_HEAD(&vma->node);
+	vma->flags = flags;
+}
+
+#define PANTHOR_VM_MAP_FLAGS \
+	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \
+	 DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
+
+static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv)
+{
+	struct panthor_vm *vm = priv;
+	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
+	struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx);
+	int ret;
+
+	if (!vma)
+		return -EINVAL;
+
+	panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
+
+	ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags),
+				   op_ctx->map.sgt, op->map.gem.offset,
+				   op->map.va.range);
+	if (ret)
+		return ret;
+
+	/* Ref owned by the mapping now, clear the obj field so we don't release the
+	 * pinning/obj ref behind GPUVA's back.
+	 */
+	drm_gpuva_map(&vm->base, &vma->base, &op->map);
+	panthor_vma_link(vm, vma, op_ctx->map.vm_bo);
+	op_ctx->map.vm_bo = NULL;
+	return 0;
+}
+
+static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
+				       void *priv)
+{
+	struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base);
+	struct panthor_vm *vm = priv;
+	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
+	struct panthor_vma *prev_vma = NULL, *next_vma = NULL;
+	u64 unmap_start, unmap_range;
+	int ret;
+
+	drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range);
+	ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range);
+	if (ret)
+		return ret;
+
+	if (op->remap.prev) {
+		prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
+		panthor_vma_init(prev_vma, unmap_vma->flags);
+	}
+
+	if (op->remap.next) {
+		next_vma = panthor_vm_op_ctx_get_vma(op_ctx);
+		panthor_vma_init(next_vma, unmap_vma->flags);
+	}
+
+	drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL,
+			next_vma ? &next_vma->base : NULL,
+			&op->remap);
+
+	if (prev_vma) {
+		/* panthor_vma_link() transfers the vm_bo ownership to
+		 * the VMA object. Since the vm_bo we're passing is still
+		 * owned by the old mapping which will be released when this
+		 * mapping is destroyed, we need to grab a ref here.
+		 */
+		panthor_vma_link(vm, prev_vma,
+				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+	}
+
+	if (next_vma) {
+		panthor_vma_link(vm, next_vma,
+				 drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo));
+	}
+
+	panthor_vma_unlink(vm, unmap_vma);
+	return 0;
+}
+
+static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
+				       void *priv)
+{
+	struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base);
+	struct panthor_vm *vm = priv;
+	int ret;
+
+	ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr,
+				     unmap_vma->base.va.range);
+	if (drm_WARN_ON(&vm->ptdev->base, ret))
+		return ret;
+
+	drm_gpuva_unmap(&op->unmap);
+	panthor_vma_unlink(vm, unmap_vma);
+	return 0;
+}
+
+static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
+	.vm_free = panthor_vm_free,
+	.sm_step_map = panthor_gpuva_sm_step_map,
+	.sm_step_remap = panthor_gpuva_sm_step_remap,
+	.sm_step_unmap = panthor_gpuva_sm_step_unmap,
+};
+
+/**
+ * panthor_vm_resv() - Get the dma_resv object attached to a VM.
+ * @vm: VM to get the dma_resv of.
+ *
+ * Return: A dma_resv object.
+ */
+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm)
+{
+	return drm_gpuvm_resv(&vm->base);
+}
+
+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm)
+{
+	if (!vm)
+		return NULL;
+
+	return vm->base.r_obj;
+}
+
+static int
+panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op,
+		   bool flag_vm_unusable_on_failure)
+{
+	u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
+	int ret;
+
+	if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY)
+		return 0;
+
+	mutex_lock(&vm->op_lock);
+	vm->op_ctx = op;
+	switch (op_type) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		if (vm->unusable) {
+			ret = -EINVAL;
+			break;
+		}
+
+		ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range,
+				       op->map.vm_bo->obj, op->map.bo_offset);
+		break;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range);
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret && flag_vm_unusable_on_failure)
+		vm->unusable = true;
+
+	vm->op_ctx = NULL;
+	mutex_unlock(&vm->op_lock);
+
+	return ret;
+}
+
+static struct dma_fence *
+panthor_vm_bind_run_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+	bool cookie;
+	int ret;
+
+	/* Not only we report an error whose result is propagated to the
+	 * drm_sched finished fence, but we also flag the VM as unusable, because
+	 * a failure in the async VM_BIND results in an inconsistent state. VM needs
+	 * to be destroyed and recreated.
+	 */
+	cookie = dma_fence_begin_signalling();
+	ret = panthor_vm_exec_op(job->vm, &job->ctx, true);
+	dma_fence_end_signalling(cookie);
+
+	return ret ? ERR_PTR(ret) : NULL;
+}
+
+static void panthor_vm_bind_job_release(struct kref *kref)
+{
+	struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount);
+
+	if (job->base.s_fence)
+		drm_sched_job_cleanup(&job->base);
+
+	panthor_vm_cleanup_op_ctx(&job->ctx, job->vm);
+	panthor_vm_put(job->vm);
+	kfree(job);
+}
+
+/**
+ * panthor_vm_bind_job_put() - Release a VM_BIND job reference
+ * @sched_job: Job to release the reference on.
+ */
+void panthor_vm_bind_job_put(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	if (sched_job)
+		kref_put(&job->refcount, panthor_vm_bind_job_release);
+}
+
+static void
+panthor_vm_bind_free_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	drm_sched_job_cleanup(sched_job);
+
+	/* Do the heavy cleanups asynchronously, so we're out of the
+	 * dma-signaling path and can acquire dma-resv locks safely.
+	 */
+	queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work);
+}
+
+static enum drm_gpu_sched_stat
+panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
+{
+	WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!");
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
+	.run_job = panthor_vm_bind_run_job,
+	.free_job = panthor_vm_bind_free_job,
+	.timedout_job = panthor_vm_bind_timedout_job,
+};
+
+/**
+ * panthor_vm_create() - Create a VM
+ * @ptdev: Device.
+ * @for_mcu: True if this is the FW MCU VM.
+ * @kernel_va_start: Start of the range reserved for kernel BO mapping.
+ * @kernel_va_size: Size of the range reserved for kernel BO mapping.
+ * @auto_kernel_va_start: Start of the auto-VA kernel range.
+ * @auto_kernel_va_size: Size of the auto-VA kernel range.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct panthor_vm *
+panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
+		  u64 kernel_va_start, u64 kernel_va_size,
+		  u64 auto_kernel_va_start, u64 auto_kernel_va_size)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
+	u64 full_va_range = 1ull << va_bits;
+	struct drm_gem_object *dummy_gem;
+	struct drm_gpu_scheduler *sched;
+	struct io_pgtable_cfg pgtbl_cfg;
+	u64 mair, min_va, va_range;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+	if (!vm)
+		return ERR_PTR(-ENOMEM);
+
+	/* We allocate a dummy GEM for the VM. */
+	dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base);
+	if (!dummy_gem) {
+		ret = -ENOMEM;
+		goto err_free_vm;
+	}
+
+	mutex_init(&vm->heaps.lock);
+	vm->for_mcu = for_mcu;
+	vm->ptdev = ptdev;
+	mutex_init(&vm->op_lock);
+
+	if (for_mcu) {
+		/* CSF MCU is a cortex M7, and can only address 4G */
+		min_va = 0;
+		va_range = SZ_4G;
+	} else {
+		min_va = 0;
+		va_range = full_va_range;
+	}
+
+	mutex_init(&vm->mm_lock);
+	drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size);
+	vm->kernel_auto_va.start = auto_kernel_va_start;
+	vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1;
+
+	INIT_LIST_HEAD(&vm->node);
+	INIT_LIST_HEAD(&vm->as.lru_node);
+	vm->as.id = -1;
+	refcount_set(&vm->as.active_cnt, 0);
+
+	pgtbl_cfg = (struct io_pgtable_cfg) {
+		.pgsize_bitmap	= SZ_4K | SZ_2M,
+		.ias		= va_bits,
+		.oas		= pa_bits,
+		.coherent_walk	= ptdev->coherent,
+		.tlb		= &mmu_tlb_ops,
+		.iommu_dev	= ptdev->base.dev,
+		.alloc		= alloc_pt,
+		.free		= free_pt,
+	};
+
+	vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm);
+	if (!vm->pgtbl_ops) {
+		ret = -EINVAL;
+		goto err_mm_takedown;
+	}
+
+	/* Bind operations are synchronous for now, no timeout needed. */
+	ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq,
+			     1, 1, 0,
+			     MAX_SCHEDULE_TIMEOUT, NULL, NULL,
+			     "panthor-vm-bind", ptdev->base.dev);
+	if (ret)
+		goto err_free_io_pgtable;
+
+	sched = &vm->sched;
+	ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL);
+	if (ret)
+		goto err_sched_fini;
+
+	mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
+	vm->memattr = mair_to_memattr(mair);
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_add_tail(&vm->node, &ptdev->mmu->vm.list);
+
+	/* If a reset is in progress, stop the scheduler. */
+	if (ptdev->mmu->vm.reset_in_progress)
+		panthor_vm_stop(vm);
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	/* We intentionally leave the reserved range to zero, because we want kernel VMAs
+	 * to be handled the same way user VMAs are.
+	 */
+	drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
+		       DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem,
+		       min_va, va_range, 0, 0, &panthor_gpuvm_ops);
+	drm_gem_object_put(dummy_gem);
+	return vm;
+
+err_sched_fini:
+	drm_sched_fini(&vm->sched);
+
+err_free_io_pgtable:
+	free_io_pgtable_ops(vm->pgtbl_ops);
+
+err_mm_takedown:
+	drm_mm_takedown(&vm->mm);
+	drm_gem_object_put(dummy_gem);
+
+err_free_vm:
+	kfree(vm);
+	return ERR_PTR(ret);
+}
+
+static int
+panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
+			       struct panthor_vm *vm,
+			       const struct drm_panthor_vm_bind_op *op,
+			       struct panthor_vm_op_ctx *op_ctx)
+{
+	struct drm_gem_object *gem;
+	int ret;
+
+	/* Aligned on page size. */
+	if ((op->va | op->size) & ~PAGE_MASK)
+		return -EINVAL;
+
+	switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
+		gem = drm_gem_object_lookup(file, op->bo_handle);
+		ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
+						    gem ? to_panthor_bo(gem) : NULL,
+						    op->bo_offset,
+						    op->size,
+						    op->va,
+						    op->flags);
+		drm_gem_object_put(gem);
+		return ret;
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
+		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+			return -EINVAL;
+
+		if (op->bo_handle || op->bo_offset)
+			return -EINVAL;
+
+		return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size);
+
+	case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY:
+		if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
+			return -EINVAL;
+
+		if (op->bo_handle || op->bo_offset)
+			return -EINVAL;
+
+		if (op->va || op->size)
+			return -EINVAL;
+
+		if (!op->syncs.count)
+			return -EINVAL;
+
+		panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm);
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work)
+{
+	struct panthor_vm_bind_job *job =
+		container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work);
+
+	panthor_vm_bind_job_put(&job->base);
+}
+
+/**
+ * panthor_vm_bind_job_create() - Create a VM_BIND job
+ * @file: File.
+ * @vm: VM targeted by the VM_BIND job.
+ * @op: VM operation data.
+ *
+ * Return: A valid pointer on success, an ERR_PTR() otherwise.
+ */
+struct drm_sched_job *
+panthor_vm_bind_job_create(struct drm_file *file,
+			   struct panthor_vm *vm,
+			   const struct drm_panthor_vm_bind_op *op)
+{
+	struct panthor_vm_bind_job *job;
+	int ret;
+
+	if (!vm)
+		return ERR_PTR(-EINVAL);
+
+	if (vm->destroyed || vm->unusable)
+		return ERR_PTR(-EINVAL);
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx);
+	if (ret) {
+		kfree(job);
+		return ERR_PTR(ret);
+	}
+
+	INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work);
+	kref_init(&job->refcount);
+	job->vm = panthor_vm_get(vm);
+
+	ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm);
+	if (ret)
+		goto err_put_job;
+
+	return &job->base;
+
+err_put_job:
+	panthor_vm_bind_job_put(&job->base);
+	return ERR_PTR(ret);
+}
+
+/**
+ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs
+ * @exec: The locking/preparation context.
+ * @sched_job: The job to prepare resvs on.
+ *
+ * Locks and prepare the VM resv.
+ *
+ * If this is a map operation, locks and prepares the GEM resv.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+	int ret;
+
+	/* Acquire the VM lock an reserve a slot for this VM bind job. */
+	ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1);
+	if (ret)
+		return ret;
+
+	if (job->ctx.map.vm_bo) {
+		/* Lock/prepare the GEM being mapped. */
+		ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job
+ * @exec: drm_exec context.
+ * @sched_job: Job to update the resvs on.
+ */
+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *sched_job)
+{
+	struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base);
+
+	/* Explicit sync => we just register our job finished fence as bookkeep. */
+	drm_gpuvm_resv_add_fence(&job->vm->base, exec,
+				 &sched_job->s_fence->finished,
+				 DMA_RESV_USAGE_BOOKKEEP,
+				 DMA_RESV_USAGE_BOOKKEEP);
+}
+
+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
+			     struct dma_fence *fence,
+			     enum dma_resv_usage private_usage,
+			     enum dma_resv_usage extobj_usage)
+{
+	drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage);
+}
+
+/**
+ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously.
+ * @file: File.
+ * @vm: VM targeted by the VM operation.
+ * @op: Data describing the VM operation.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_bind_exec_sync_op(struct drm_file *file,
+				 struct panthor_vm *vm,
+				 struct drm_panthor_vm_bind_op *op)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	/* No sync objects allowed on synchronous operations. */
+	if (op->syncs.count)
+		return -EINVAL;
+
+	if (!op->size)
+		return 0;
+
+	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_map_bo_range() - Map a GEM object range to a VM
+ * @vm: VM to map the GEM to.
+ * @bo: GEM object to map.
+ * @offset: Offset in the GEM object.
+ * @size: Size to map.
+ * @va: Virtual address to map the object to.
+ * @flags: Combination of drm_panthor_vm_bind_op_flags flags.
+ * Only map-related flags are valid.
+ *
+ * Internal use only. For userspace requests, use
+ * panthor_vm_bind_exec_sync_op() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
+			    u64 offset, u64 size, u64 va, u32 flags)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_unmap_range() - Unmap a portion of the VA space
+ * @vm: VM to unmap the region from.
+ * @va: Virtual address to unmap. Must be 4k aligned.
+ * @size: Size of the region to unmap. Must be 4k aligned.
+ *
+ * Internal use only. For userspace requests, use
+ * panthor_vm_bind_exec_sync_op() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size)
+{
+	struct panthor_vm_op_ctx op_ctx;
+	int ret;
+
+	ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size);
+	if (ret)
+		return ret;
+
+	ret = panthor_vm_exec_op(vm, &op_ctx, false);
+	panthor_vm_cleanup_op_ctx(&op_ctx, vm);
+
+	return ret;
+}
+
+/**
+ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs.
+ * @exec: Locking/preparation context.
+ * @vm: VM targeted by the GPU job.
+ * @slot_count: Number of slots to reserve.
+ *
+ * GPU jobs assume all BOs bound to the VM at the time the job is submitted
+ * are available when the job is executed. In order to guarantee that, we
+ * need to reserve a slot on all BOs mapped to a VM and update this slot with
+ * the job fence after its submission.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm,
+					u32 slot_count)
+{
+	int ret;
+
+	/* Acquire the VM lock and reserve a slot for this GPU job. */
+	ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count);
+	if (ret)
+		return ret;
+
+	return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count);
+}
+
+/**
+ * panthor_mmu_unplug() - Unplug the MMU logic
+ * @ptdev: Device.
+ *
+ * No access to the MMU regs should be done after this function is called.
+ * We suspend the IRQ and disable all VMs to guarantee that.
+ */
+void panthor_mmu_unplug(struct panthor_device *ptdev)
+{
+	panthor_mmu_irq_suspend(&ptdev->mmu->irq);
+
+	mutex_lock(&ptdev->mmu->as.slots_lock);
+	for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
+		struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
+
+		if (vm) {
+			drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
+			panthor_vm_release_as_locked(vm);
+		}
+	}
+	mutex_unlock(&ptdev->mmu->as.slots_lock);
+}
+
+static void panthor_mmu_release_wq(struct drm_device *ddev, void *res)
+{
+	destroy_workqueue(res);
+}
+
+/**
+ * panthor_mmu_init() - Initialize the MMU logic.
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_mmu_init(struct panthor_device *ptdev)
+{
+	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
+	struct panthor_mmu *mmu;
+	int ret, irq;
+
+	mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL);
+	if (!mmu)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&mmu->as.lru_list);
+
+	ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&mmu->vm.list);
+	ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock);
+	if (ret)
+		return ret;
+
+	ptdev->mmu = mmu;
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu");
+	if (irq <= 0)
+		return -ENODEV;
+
+	ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq,
+				      panthor_mmu_fault_mask(ptdev, ~0));
+	if (ret)
+		return ret;
+
+	mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0);
+	if (!mmu->vm.wq)
+		return -ENOMEM;
+
+	/* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction,
+	 * which passes iova as an unsigned long. Patch the mmu_features to reflect this
+	 * limitation.
+	 */
+	if (sizeof(unsigned long) * 8 < va_bits) {
+		ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0);
+		ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8;
+	}
+
+	return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m)
+{
+	int ret;
+
+	mutex_lock(&vm->op_lock);
+	ret = drm_debugfs_gpuva_info(m, &vm->base);
+	mutex_unlock(&vm->op_lock);
+
+	return ret;
+}
+
+static int show_each_vm(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *ddev = node->minor->dev;
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data;
+	struct panthor_vm *vm;
+	int ret = 0;
+
+	mutex_lock(&ptdev->mmu->vm.lock);
+	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
+		ret = show(vm, m);
+		if (ret < 0)
+			break;
+
+		seq_puts(m, "\n");
+	}
+	mutex_unlock(&ptdev->mmu->vm.lock);
+
+	return ret;
+}
+
+static struct drm_info_list panthor_mmu_debugfs_list[] = {
+	DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas),
+};
+
+/**
+ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries
+ * @minor: Minor.
+ */
+void panthor_mmu_debugfs_init(struct drm_minor *minor)
+{
+	drm_debugfs_create_files(panthor_mmu_debugfs_list,
+				 ARRAY_SIZE(panthor_mmu_debugfs_list),
+				 minor->debugfs_root, minor);
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * panthor_mmu_pt_cache_init() - Initialize the page table cache.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_mmu_pt_cache_init(void)
+{
+	pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL);
+	if (!pt_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * panthor_mmu_pt_cache_fini() - Destroy the page table cache.
+ */
+void panthor_mmu_pt_cache_fini(void)
+{
+	kmem_cache_destroy(pt_cache);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h
new file mode 100644
index 000000000000..f3c1ed19f973
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_mmu.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_MMU_H__
+#define __PANTHOR_MMU_H__
+
+#include <linux/dma-resv.h>
+
+struct drm_exec;
+struct drm_sched_job;
+struct panthor_gem_object;
+struct panthor_heap_pool;
+struct panthor_vm;
+struct panthor_vma;
+struct panthor_mmu;
+
+int panthor_mmu_init(struct panthor_device *ptdev);
+void panthor_mmu_unplug(struct panthor_device *ptdev);
+void panthor_mmu_pre_reset(struct panthor_device *ptdev);
+void panthor_mmu_post_reset(struct panthor_device *ptdev);
+void panthor_mmu_suspend(struct panthor_device *ptdev);
+void panthor_mmu_resume(struct panthor_device *ptdev);
+
+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo,
+			    u64 offset, u64 size, u64 va, u32 flags);
+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size);
+struct panthor_gem_object *
+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset);
+
+int panthor_vm_active(struct panthor_vm *vm);
+void panthor_vm_idle(struct panthor_vm *vm);
+int panthor_vm_as(struct panthor_vm *vm);
+
+struct panthor_heap_pool *
+panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create);
+
+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm);
+void panthor_vm_put(struct panthor_vm *vm);
+struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
+				     u64 kernel_va_start, u64 kernel_va_size,
+				     u64 kernel_auto_va_start,
+				     u64 kernel_auto_va_size);
+
+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec,
+					struct panthor_vm *vm,
+					u32 slot_count);
+int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm,
+					 struct drm_sched_job *job);
+void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm,
+					   struct drm_sched_job *job);
+
+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm);
+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm);
+
+void panthor_vm_pool_destroy(struct panthor_file *pfile);
+int panthor_vm_pool_create(struct panthor_file *pfile);
+int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
+			      struct panthor_vm_pool *pool,
+			      struct drm_panthor_vm_create *args);
+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle);
+struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle);
+
+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm);
+bool panthor_vm_is_unusable(struct panthor_vm *vm);
+
+/*
+ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM
+ * logic to auto-allocate the virtual address in the reserved kernel VA range.
+ */
+#define PANTHOR_VM_KERNEL_AUTO_VA		~0ull
+
+int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
+			struct drm_mm_node *va_node);
+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node);
+
+int panthor_vm_bind_exec_sync_op(struct drm_file *file,
+				 struct panthor_vm *vm,
+				 struct drm_panthor_vm_bind_op *op);
+
+struct drm_sched_job *
+panthor_vm_bind_job_create(struct drm_file *file,
+			   struct panthor_vm *vm,
+			   const struct drm_panthor_vm_bind_op *op);
+void panthor_vm_bind_job_put(struct drm_sched_job *job);
+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
+				      struct drm_sched_job *job);
+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
+
+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec,
+			     struct dma_fence *fence,
+			     enum dma_resv_usage private_usage,
+			     enum dma_resv_usage extobj_usage);
+
+int panthor_mmu_pt_cache_init(void);
+void panthor_mmu_pt_cache_fini(void);
+
+#ifdef CONFIG_DEBUG_FS
+void panthor_mmu_debugfs_init(struct drm_minor *minor);
+#endif
+
+#endif
--
2.42.0


From 8a12a05f837c17bd0653b1906b08dcfa5ee15962 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:22 +0100
Subject: [PATCH 09/69] [MERGED] drm/panthor: Add the FW logical block

Contains everything that's FW related, that includes the code dealing
with the microcontroller unit (MCU) that's running the FW, and anything
related to allocating memory shared between the FW and the CPU.

A few global FW events are processed in the IRQ handler, the rest is
forwarded to the scheduler, since scheduling is the primary reason for
the FW existence, and also the main source of FW <-> kernel
interactions.

v6:
- Add Maxime's and Heiko's acks
- Keep header inclusion alphabetically ordered

v5:
- Fix typo in GLB_PERFCNT_SAMPLE definition
- Fix unbalanced panthor_vm_idle/active() calls
- Fallback to a slow reset when the fast reset fails
- Add extra information when reporting a FW boot failure

v4:
- Add a MODULE_FIRMWARE() entry for gen 10.8
- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry()
- Fix typos
- Add Steve's R-b

v3:
- Make the FW path more future-proof (Liviu)
- Use one waitqueue for all FW events
- Simplify propagation of FW events to the scheduler logic
- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead
- Account for the panthor_vm changes
- Replace magic number with 0x7fffffff with ~0 to better signify that
  it's the maximum permitted value.
- More accurate rounding when computing the firmware timeout.
- Add a 'sub iterator' helper function. This also adds a check that a
  firmware entry doesn't overflow the firmware image.
- Drop __packed from FW structures, natural alignment is good enough.
- Other minor code improvements.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-9-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_fw.c | 1362 ++++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_fw.h |  503 ++++++++++
 2 files changed, 1865 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c
new file mode 100644
index 000000000000..33c87a59834e
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_fw.c
@@ -0,0 +1,1362 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+#include <asm/arch_timer.h>
+#endif
+
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/iopoll.h>
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+#define CSF_FW_NAME "mali_csffw.bin"
+
+#define PING_INTERVAL_MS			12000
+#define PROGRESS_TIMEOUT_CYCLES			(5ull * 500 * 1024 * 1024)
+#define PROGRESS_TIMEOUT_SCALE_SHIFT		10
+#define IDLE_HYSTERESIS_US			800
+#define PWROFF_HYSTERESIS_US			10000
+
+/**
+ * struct panthor_fw_binary_hdr - Firmware binary header.
+ */
+struct panthor_fw_binary_hdr {
+	/** @magic: Magic value to check binary validity. */
+	u32 magic;
+#define CSF_FW_BINARY_HEADER_MAGIC		0xc3f13a6e
+
+	/** @minor: Minor FW version. */
+	u8 minor;
+
+	/** @major: Major FW version. */
+	u8 major;
+#define CSF_FW_BINARY_HEADER_MAJOR_MAX		0
+
+	/** @padding1: MBZ. */
+	u16 padding1;
+
+	/** @version_hash: FW version hash. */
+	u32 version_hash;
+
+	/** @padding2: MBZ. */
+	u32 padding2;
+
+	/** @size: FW binary size. */
+	u32 size;
+};
+
+/**
+ * enum panthor_fw_binary_entry_type - Firmware binary entry type
+ */
+enum panthor_fw_binary_entry_type {
+	/** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */
+	CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */
+	CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3,
+
+	/** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */
+	CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4,
+};
+
+#define CSF_FW_BINARY_ENTRY_TYPE(ehdr)					((ehdr) & 0xff)
+#define CSF_FW_BINARY_ENTRY_SIZE(ehdr)					(((ehdr) >> 8) & 0xff)
+#define CSF_FW_BINARY_ENTRY_UPDATE					BIT(30)
+#define CSF_FW_BINARY_ENTRY_OPTIONAL					BIT(31)
+
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD					BIT(0)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR					BIT(1)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX					BIT(2)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE			(0 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED			(1 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT	(2 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT		(3 << 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK			GENMASK(4, 3)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT				BIT(5)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED				BIT(30)
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO				BIT(31)
+
+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS			\
+	(CSF_FW_BINARY_IFACE_ENTRY_RD_RD |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_WR |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_EX |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK |			\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_PROT |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED  |				\
+	 CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)
+
+/**
+ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary
+ */
+struct panthor_fw_binary_section_entry_hdr {
+	/** @flags: Section flags. */
+	u32 flags;
+
+	/** @va: MCU virtual range to map this binary section to. */
+	struct {
+		/** @start: Start address. */
+		u32 start;
+
+		/** @end: End address. */
+		u32 end;
+	} va;
+
+	/** @data: Data to initialize the FW section with. */
+	struct {
+		/** @start: Start offset in the FW binary. */
+		u32 start;
+
+		/** @end: End offset in the FW binary. */
+		u32 end;
+	} data;
+};
+
+/**
+ * struct panthor_fw_binary_iter - Firmware binary iterator
+ *
+ * Used to parse a firmware binary.
+ */
+struct panthor_fw_binary_iter {
+	/** @data: FW binary data. */
+	const void *data;
+
+	/** @size: FW binary size. */
+	size_t size;
+
+	/** @offset: Iterator offset. */
+	size_t offset;
+};
+
+/**
+ * struct panthor_fw_section - FW section
+ */
+struct panthor_fw_section {
+	/** @node: Used to keep track of FW sections. */
+	struct list_head node;
+
+	/** @flags: Section flags, as encoded in the FW binary. */
+	u32 flags;
+
+	/** @mem: Section memory. */
+	struct panthor_kernel_bo *mem;
+
+	/**
+	 * @name: Name of the section, as specified in the binary.
+	 *
+	 * Can be NULL.
+	 */
+	const char *name;
+
+	/**
+	 * @data: Initial data copied to the FW memory.
+	 *
+	 * We keep data around so we can reload sections after a reset.
+	 */
+	struct {
+		/** @buf: Buffed used to store init data. */
+		const void *buf;
+
+		/** @size: Size of @buf in bytes. */
+		size_t size;
+	} data;
+};
+
+#define CSF_MCU_SHARED_REGION_START		0x04000000ULL
+#define CSF_MCU_SHARED_REGION_SIZE		0x04000000ULL
+
+#define MIN_CS_PER_CSG				8
+#define MIN_CSGS				3
+#define MAX_CSG_PRIO				0xf
+
+#define CSF_IFACE_VERSION(major, minor, patch)	\
+	(((major) << 24) | ((minor) << 16) | (patch))
+#define CSF_IFACE_VERSION_MAJOR(v)		((v) >> 24)
+#define CSF_IFACE_VERSION_MINOR(v)		(((v) >> 16) & 0xff)
+#define CSF_IFACE_VERSION_PATCH(v)		((v) & 0xffff)
+
+#define CSF_GROUP_CONTROL_OFFSET		0x1000
+#define CSF_STREAM_CONTROL_OFFSET		0x40
+#define CSF_UNPRESERVED_REG_COUNT		4
+
+/**
+ * struct panthor_fw_iface - FW interfaces
+ */
+struct panthor_fw_iface {
+	/** @global: Global interface. */
+	struct panthor_fw_global_iface global;
+
+	/** @groups: Group slot interfaces. */
+	struct panthor_fw_csg_iface groups[MAX_CSGS];
+
+	/** @streams: Command stream slot interfaces. */
+	struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG];
+};
+
+/**
+ * struct panthor_fw - Firmware management
+ */
+struct panthor_fw {
+	/** @vm: MCU VM. */
+	struct panthor_vm *vm;
+
+	/** @sections: List of FW sections. */
+	struct list_head sections;
+
+	/** @shared_section: The section containing the FW interfaces. */
+	struct panthor_fw_section *shared_section;
+
+	/** @iface: FW interfaces. */
+	struct panthor_fw_iface iface;
+
+	/** @watchdog: Collection of fields relating to the FW watchdog. */
+	struct {
+		/** @ping_work: Delayed work used to ping the FW. */
+		struct delayed_work ping_work;
+	} watchdog;
+
+	/**
+	 * @req_waitqueue: FW request waitqueue.
+	 *
+	 * Everytime a request is sent to a command stream group or the global
+	 * interface, the caller will first busy wait for the request to be
+	 * acknowledged, and then fallback to a sleeping wait.
+	 *
+	 * This wait queue is here to support the sleeping wait flavor.
+	 */
+	wait_queue_head_t req_waitqueue;
+
+	/** @booted: True is the FW is booted */
+	bool booted;
+
+	/**
+	 * @fast_reset: True if the post_reset logic can proceed with a fast reset.
+	 *
+	 * A fast reset is just a reset where the driver doesn't reload the FW sections.
+	 *
+	 * Any time the firmware is properly suspended, a fast reset can take place.
+	 * On the other hand, if the halt operation failed, the driver will reload
+	 * all sections to make sure we start from a fresh state.
+	 */
+	bool fast_reset;
+
+	/** @irq: Job irq data. */
+	struct panthor_irq irq;
+};
+
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev)
+{
+	return ptdev->fw->vm;
+}
+
+/**
+ * panthor_fw_get_glb_iface() - Get the global interface
+ * @ptdev: Device.
+ *
+ * Return: The global interface.
+ */
+struct panthor_fw_global_iface *
+panthor_fw_get_glb_iface(struct panthor_device *ptdev)
+{
+	return &ptdev->fw->iface.global;
+}
+
+/**
+ * panthor_fw_get_csg_iface() - Get a command stream group slot interface
+ * @ptdev: Device.
+ * @csg_slot: Index of the command stream group slot.
+ *
+ * Return: The command stream group slot interface.
+ */
+struct panthor_fw_csg_iface *
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot)
+{
+	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS))
+		return NULL;
+
+	return &ptdev->fw->iface.groups[csg_slot];
+}
+
+/**
+ * panthor_fw_get_cs_iface() - Get a command stream slot interface
+ * @ptdev: Device.
+ * @csg_slot: Index of the command stream group slot.
+ * @cs_slot: Index of the command stream slot.
+ *
+ * Return: The command stream slot interface.
+ */
+struct panthor_fw_cs_iface *
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot)
+{
+	if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG))
+		return NULL;
+
+	return &ptdev->fw->iface.streams[csg_slot][cs_slot];
+}
+
+/**
+ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count
+ * @ptdev: Device.
+ * @timeout_us: Timeout expressed in micro-seconds.
+ *
+ * The FW has two timer sources: the GPU counter or arch-timer. We need
+ * to express timeouts in term of number of cycles and specify which
+ * timer source should be used.
+ *
+ * Return: A value suitable for timeout fields in the global interface.
+ */
+static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us)
+{
+	bool use_cycle_counter = false;
+	u32 timer_rate = 0;
+	u64 mod_cycles;
+
+#ifdef CONFIG_ARM_ARCH_TIMER
+	timer_rate = arch_timer_get_cntfrq();
+#endif
+
+	if (!timer_rate) {
+		use_cycle_counter = true;
+		timer_rate = clk_get_rate(ptdev->clks.core);
+	}
+
+	if (drm_WARN_ON(&ptdev->base, !timer_rate)) {
+		/* We couldn't get a valid clock rate, let's just pick the
+		 * maximum value so the FW still handles the core
+		 * power on/off requests.
+		 */
+		return GLB_TIMER_VAL(~0) |
+		       GLB_TIMER_SOURCE_GPU_COUNTER;
+	}
+
+	mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate,
+				      1000000ull << 10);
+	if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0)))
+		mod_cycles = GLB_TIMER_VAL(~0);
+
+	return GLB_TIMER_VAL(mod_cycles) |
+	       (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0);
+}
+
+static int panthor_fw_binary_iter_read(struct panthor_device *ptdev,
+				       struct panthor_fw_binary_iter *iter,
+				       void *out, size_t size)
+{
+	size_t new_offset = iter->offset + size;
+
+	if (new_offset > iter->size || new_offset < iter->offset) {
+		drm_err(&ptdev->base, "Firmware too small\n");
+		return -EINVAL;
+	}
+
+	memcpy(out, iter->data + iter->offset, size);
+	iter->offset = new_offset;
+	return 0;
+}
+
+static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev,
+					   struct panthor_fw_binary_iter *iter,
+					   struct panthor_fw_binary_iter *sub_iter,
+					   size_t size)
+{
+	size_t new_offset = iter->offset + size;
+
+	if (new_offset > iter->size || new_offset < iter->offset) {
+		drm_err(&ptdev->base, "Firmware entry too long\n");
+		return -EINVAL;
+	}
+
+	sub_iter->offset = 0;
+	sub_iter->data = iter->data + iter->offset;
+	sub_iter->size = size;
+	iter->offset = new_offset;
+	return 0;
+}
+
+static void panthor_fw_init_section_mem(struct panthor_device *ptdev,
+					struct panthor_fw_section *section)
+{
+	bool was_mapped = !!section->mem->kmap;
+	int ret;
+
+	if (!section->data.size &&
+	    !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO))
+		return;
+
+	ret = panthor_kernel_bo_vmap(section->mem);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		return;
+
+	memcpy(section->mem->kmap, section->data.buf, section->data.size);
+	if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) {
+		memset(section->mem->kmap + section->data.size, 0,
+		       panthor_kernel_bo_size(section->mem) - section->data.size);
+	}
+
+	if (!was_mapped)
+		panthor_kernel_bo_vunmap(section->mem);
+}
+
+/**
+ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces.
+ * @ptdev: Device.
+ * @input: Pointer holding the input interface on success.
+ * Should be ignored on failure.
+ * @output: Pointer holding the output interface on success.
+ * Should be ignored on failure.
+ * @input_fw_va: Pointer holding the input interface FW VA on success.
+ * Should be ignored on failure.
+ * @output_fw_va: Pointer holding the output interface FW VA on success.
+ * Should be ignored on failure.
+ *
+ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input
+ * interface is at offset 0, and the output interface at offset 4096.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
+				 struct panthor_fw_ringbuf_input_iface **input,
+				 const struct panthor_fw_ringbuf_output_iface **output,
+				 u32 *input_fw_va, u32 *output_fw_va)
+{
+	struct panthor_kernel_bo *mem;
+	int ret;
+
+	mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K,
+				       DRM_PANTHOR_BO_NO_MMAP,
+				       DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+				       DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+				       PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(mem))
+		return mem;
+
+	ret = panthor_kernel_bo_vmap(mem);
+	if (ret) {
+		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem);
+		return ERR_PTR(ret);
+	}
+
+	memset(mem->kmap, 0, panthor_kernel_bo_size(mem));
+	*input = mem->kmap;
+	*output = mem->kmap + SZ_4K;
+	*input_fw_va = panthor_kernel_bo_gpuva(mem);
+	*output_fw_va = *input_fw_va + SZ_4K;
+
+	return mem;
+}
+
+/**
+ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group.
+ * @ptdev: Device.
+ * @size: Size of the suspend buffer.
+ *
+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise.
+ */
+struct panthor_kernel_bo *
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size)
+{
+	return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size,
+					DRM_PANTHOR_BO_NO_MMAP,
+					DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+					PANTHOR_VM_KERNEL_AUTO_VA);
+}
+
+static int panthor_fw_load_section_entry(struct panthor_device *ptdev,
+					 const struct firmware *fw,
+					 struct panthor_fw_binary_iter *iter,
+					 u32 ehdr)
+{
+	struct panthor_fw_binary_section_entry_hdr hdr;
+	struct panthor_fw_section *section;
+	u32 section_size;
+	u32 name_len;
+	int ret;
+
+	ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr));
+	if (ret)
+		return ret;
+
+	if (hdr.data.end < hdr.data.start) {
+		drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n",
+			hdr.data.end, hdr.data.start);
+		return -EINVAL;
+	}
+
+	if (hdr.va.end < hdr.va.start) {
+		drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n",
+			hdr.va.end, hdr.va.start);
+		return -EINVAL;
+	}
+
+	if (hdr.data.end > fw->size) {
+		drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n",
+			hdr.data.end, fw->size);
+		return -EINVAL;
+	}
+
+	if ((hdr.va.start & ~PAGE_MASK) != 0 ||
+	    (hdr.va.end & ~PAGE_MASK) != 0) {
+		drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n",
+			hdr.va.start, hdr.va.end);
+		return -EINVAL;
+	}
+
+	if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) {
+		drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n",
+			hdr.flags);
+		return -EINVAL;
+	}
+
+	if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) {
+		drm_warn(&ptdev->base,
+			 "Firmware protected mode entry not be supported, ignoring");
+		return 0;
+	}
+
+	if (hdr.va.start == CSF_MCU_SHARED_REGION_START &&
+	    !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) {
+		drm_err(&ptdev->base,
+			"Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START);
+		return -EINVAL;
+	}
+
+	name_len = iter->size - iter->offset;
+
+	section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL);
+	if (!section)
+		return -ENOMEM;
+
+	list_add_tail(&section->node, &ptdev->fw->sections);
+	section->flags = hdr.flags;
+	section->data.size = hdr.data.end - hdr.data.start;
+
+	if (section->data.size > 0) {
+		void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL);
+
+		if (!data)
+			return -ENOMEM;
+
+		memcpy(data, fw->data + hdr.data.start, section->data.size);
+		section->data.buf = data;
+	}
+
+	if (name_len > 0) {
+		char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL);
+
+		if (!name)
+			return -ENOMEM;
+
+		memcpy(name, iter->data + iter->offset, name_len);
+		name[name_len] = '\0';
+		section->name = name;
+	}
+
+	section_size = hdr.va.end - hdr.va.start;
+	if (section_size) {
+		u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK;
+		struct panthor_gem_object *bo;
+		u32 vm_map_flags = 0;
+		struct sg_table *sgt;
+		u64 va = hdr.va.start;
+
+		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY;
+
+		if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX))
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC;
+
+		/* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to
+		 * non-cacheable for now. We might want to introduce a new
+		 * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device
+		 * memory and is currently not used by our driver) for
+		 * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit
+		 * of IO-coherent systems.
+		 */
+		if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED)
+			vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED;
+
+		section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev),
+							section_size,
+							DRM_PANTHOR_BO_NO_MMAP,
+							vm_map_flags, va);
+		if (IS_ERR(section->mem))
+			return PTR_ERR(section->mem);
+
+		if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start))
+			return -EINVAL;
+
+		if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) {
+			ret = panthor_kernel_bo_vmap(section->mem);
+			if (ret)
+				return ret;
+		}
+
+		panthor_fw_init_section_mem(ptdev, section);
+
+		bo = to_panthor_bo(section->mem->obj);
+		sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+		if (IS_ERR(sgt))
+			return PTR_ERR(sgt);
+
+		dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
+	}
+
+	if (hdr.va.start == CSF_MCU_SHARED_REGION_START)
+		ptdev->fw->shared_section = section;
+
+	return 0;
+}
+
+static void
+panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload)
+{
+	struct panthor_fw_section *section;
+
+	list_for_each_entry(section, &ptdev->fw->sections, node) {
+		struct sg_table *sgt;
+
+		if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR))
+			continue;
+
+		panthor_fw_init_section_mem(ptdev, section);
+		sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base);
+		if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt)))
+			dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE);
+	}
+}
+
+static int panthor_fw_load_entry(struct panthor_device *ptdev,
+				 const struct firmware *fw,
+				 struct panthor_fw_binary_iter *iter)
+{
+	struct panthor_fw_binary_iter eiter;
+	u32 ehdr;
+	int ret;
+
+	ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr));
+	if (ret)
+		return ret;
+
+	if ((iter->offset % sizeof(u32)) ||
+	    (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) {
+		drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n",
+			(u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr));
+		return -EINVAL;
+	}
+
+	if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter,
+					    CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr)))
+		return -EINVAL;
+
+	switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) {
+	case CSF_FW_BINARY_ENTRY_TYPE_IFACE:
+		return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr);
+
+	/* FIXME: handle those entry types? */
+	case CSF_FW_BINARY_ENTRY_TYPE_CONFIG:
+	case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST:
+	case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER:
+	case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA:
+		return 0;
+	default:
+		break;
+	}
+
+	if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL)
+		return 0;
+
+	drm_err(&ptdev->base,
+		"Unsupported non-optional entry type %u in firmware\n",
+		CSF_FW_BINARY_ENTRY_TYPE(ehdr));
+	return -EINVAL;
+}
+
+static int panthor_fw_load(struct panthor_device *ptdev)
+{
+	const struct firmware *fw = NULL;
+	struct panthor_fw_binary_iter iter = {};
+	struct panthor_fw_binary_hdr hdr;
+	char fw_path[128];
+	int ret;
+
+	snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s",
+		 (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id),
+		 (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id),
+		 CSF_FW_NAME);
+
+	ret = request_firmware(&fw, fw_path, ptdev->base.dev);
+	if (ret) {
+		drm_err(&ptdev->base, "Failed to load firmware image '%s'\n",
+			CSF_FW_NAME);
+		return ret;
+	}
+
+	iter.data = fw->data;
+	iter.size = fw->size;
+	ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr));
+	if (ret)
+		goto out;
+
+	if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) {
+		ret = -EINVAL;
+		drm_err(&ptdev->base, "Invalid firmware magic\n");
+		goto out;
+	}
+
+	if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) {
+		ret = -EINVAL;
+		drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n",
+			hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX);
+		goto out;
+	}
+
+	if (hdr.size > iter.size) {
+		drm_err(&ptdev->base, "Firmware image is truncated\n");
+		goto out;
+	}
+
+	iter.size = hdr.size;
+
+	while (iter.offset < hdr.size) {
+		ret = panthor_fw_load_entry(ptdev, fw, &iter);
+		if (ret)
+			goto out;
+	}
+
+	if (!ptdev->fw->shared_section) {
+		drm_err(&ptdev->base, "Shared interface region not found\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+out:
+	release_firmware(fw);
+	return ret;
+}
+
+/**
+ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address
+ * @ptdev: Device.
+ * @mcu_va: MCU address.
+ *
+ * Return: NULL if the address is not part of the shared section, non-NULL otherwise.
+ */
+static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va)
+{
+	u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem);
+	u64 shared_mem_end = shared_mem_start +
+			     panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end)
+		return NULL;
+
+	return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start);
+}
+
+static int panthor_init_cs_iface(struct panthor_device *ptdev,
+				 unsigned int csg_idx, unsigned int cs_idx)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx);
+	struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx];
+	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET +
+			   (csg_idx * glb_iface->control->group_stride) +
+			   CSF_STREAM_CONTROL_OFFSET +
+			   (cs_idx * csg_iface->control->stream_stride);
+	struct panthor_fw_cs_iface *first_cs_iface =
+		panthor_fw_get_cs_iface(ptdev, 0, 0);
+
+	if (iface_offset + sizeof(*cs_iface) >= shared_section_sz)
+		return -EINVAL;
+
+	spin_lock_init(&cs_iface->lock);
+	cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
+	cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va);
+	cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va);
+
+	if (!cs_iface->input || !cs_iface->output) {
+		drm_err(&ptdev->base, "Invalid stream control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (cs_iface != first_cs_iface) {
+		if (cs_iface->control->features != first_cs_iface->control->features) {
+			drm_err(&ptdev->base, "Expecting identical CS slots");
+			return -EINVAL;
+		}
+	} else {
+		u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features);
+
+		ptdev->csif_info.cs_reg_count = reg_count;
+		ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT;
+	}
+
+	return 0;
+}
+
+static bool compare_csg(const struct panthor_fw_csg_control_iface *a,
+			const struct panthor_fw_csg_control_iface *b)
+{
+	if (a->features != b->features)
+		return false;
+	if (a->suspend_size != b->suspend_size)
+		return false;
+	if (a->protm_suspend_size != b->protm_suspend_size)
+		return false;
+	if (a->stream_num != b->stream_num)
+		return false;
+	return true;
+}
+
+static int panthor_init_csg_iface(struct panthor_device *ptdev,
+				  unsigned int csg_idx)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx];
+	u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem);
+	u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride);
+	unsigned int i;
+
+	if (iface_offset + sizeof(*csg_iface) >= shared_section_sz)
+		return -EINVAL;
+
+	spin_lock_init(&csg_iface->lock);
+	csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset;
+	csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va);
+	csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va);
+
+	if (csg_iface->control->stream_num < MIN_CS_PER_CSG ||
+	    csg_iface->control->stream_num > MAX_CS_PER_CSG)
+		return -EINVAL;
+
+	if (!csg_iface->input || !csg_iface->output) {
+		drm_err(&ptdev->base, "Invalid group control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (csg_idx > 0) {
+		struct panthor_fw_csg_iface *first_csg_iface =
+			panthor_fw_get_csg_iface(ptdev, 0);
+
+		if (!compare_csg(first_csg_iface->control, csg_iface->control)) {
+			drm_err(&ptdev->base, "Expecting identical CSG slots");
+			return -EINVAL;
+		}
+	}
+
+	for (i = 0; i < csg_iface->control->stream_num; i++) {
+		int ret = panthor_init_cs_iface(ptdev, csg_idx, i);
+
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static u32 panthor_get_instr_features(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0))
+		return 0;
+
+	return glb_iface->control->instr_features;
+}
+
+static int panthor_fw_init_ifaces(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global;
+	unsigned int i;
+
+	if (!ptdev->fw->shared_section->mem->kmap)
+		return -EINVAL;
+
+	spin_lock_init(&glb_iface->lock);
+	glb_iface->control = ptdev->fw->shared_section->mem->kmap;
+
+	if (!glb_iface->control->version) {
+		drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot");
+		return -EINVAL;
+	}
+
+	glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va);
+	glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va);
+	if (!glb_iface->input || !glb_iface->output) {
+		drm_err(&ptdev->base, "Invalid global control interface input/output VA");
+		return -EINVAL;
+	}
+
+	if (glb_iface->control->group_num > MAX_CSGS ||
+	    glb_iface->control->group_num < MIN_CSGS) {
+		drm_err(&ptdev->base, "Invalid number of control groups");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < glb_iface->control->group_num; i++) {
+		int ret = panthor_init_csg_iface(ptdev, i);
+
+		if (ret)
+			return ret;
+	}
+
+	drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x",
+		 CSF_IFACE_VERSION_MAJOR(glb_iface->control->version),
+		 CSF_IFACE_VERSION_MINOR(glb_iface->control->version),
+		 CSF_IFACE_VERSION_PATCH(glb_iface->control->version),
+		 glb_iface->control->features,
+		 panthor_get_instr_features(ptdev));
+	return 0;
+}
+
+static void panthor_fw_init_global_iface(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	/* Enable all cores. */
+	glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present;
+
+	/* Setup timers. */
+	glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US);
+	glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT;
+	glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US);
+
+	/* Enable interrupts we care about. */
+	glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN |
+					 GLB_PING |
+					 GLB_CFG_PROGRESS_TIMER |
+					 GLB_CFG_POWEROFF_TIMER |
+					 GLB_IDLE_EN |
+					 GLB_IDLE;
+
+	panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN);
+	panthor_fw_toggle_reqs(glb_iface, req, ack,
+			       GLB_CFG_ALLOC_EN |
+			       GLB_CFG_POWEROFF_TIMER |
+			       GLB_CFG_PROGRESS_TIMER);
+
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+
+	/* Kick the watchdog. */
+	mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work,
+			 msecs_to_jiffies(PING_INTERVAL_MS));
+}
+
+static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status)
+{
+	if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF))
+		ptdev->fw->booted = true;
+
+	wake_up_all(&ptdev->fw->req_waitqueue);
+
+	/* If the FW is not booted, don't process IRQs, just flag the FW as booted. */
+	if (!ptdev->fw->booted)
+		return;
+
+	panthor_sched_report_fw_events(ptdev, status);
+}
+PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler);
+
+static int panthor_fw_start(struct panthor_device *ptdev)
+{
+	bool timedout = false;
+
+	ptdev->fw->booted = false;
+	panthor_job_irq_resume(&ptdev->fw->irq, ~0);
+	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO);
+
+	if (!wait_event_timeout(ptdev->fw->req_waitqueue,
+				ptdev->fw->booted,
+				msecs_to_jiffies(1000))) {
+		if (!ptdev->fw->booted &&
+		    !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF))
+			timedout = true;
+	}
+
+	if (timedout) {
+		static const char * const status_str[] = {
+			[MCU_STATUS_DISABLED] = "disabled",
+			[MCU_STATUS_ENABLED] = "enabled",
+			[MCU_STATUS_HALT] = "halt",
+			[MCU_STATUS_FATAL] = "fatal",
+		};
+		u32 status = gpu_read(ptdev, MCU_STATUS);
+
+		drm_err(&ptdev->base, "Failed to boot MCU (status=%s)",
+			status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void panthor_fw_stop(struct panthor_device *ptdev)
+{
+	u32 status;
+
+	gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+	if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
+			       status == MCU_STATUS_DISABLED, 10, 100000))
+		drm_err(&ptdev->base, "Failed to stop MCU");
+}
+
+/**
+ * panthor_fw_pre_reset() - Call before a reset.
+ * @ptdev: Device.
+ * @on_hang: true if the reset was triggered on a GPU hang.
+ *
+ * If the reset is not triggered on a hang, we try to gracefully halt the
+ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called.
+ */
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang)
+{
+	/* Make sure we won't be woken up by a ping. */
+	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
+
+	ptdev->fw->fast_reset = false;
+
+	if (!on_hang) {
+		struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+		u32 status;
+
+		panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT);
+		gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+		if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status,
+					status == MCU_STATUS_HALT, 10, 100000) &&
+		    glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) {
+			ptdev->fw->fast_reset = true;
+		} else {
+			drm_warn(&ptdev->base, "Failed to cleanly suspend MCU");
+		}
+
+		/* The FW detects 0 -> 1 transitions. Make sure we reset
+		 * the HALT bit before the FW is rebooted.
+		 */
+		panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT);
+	}
+
+	panthor_job_irq_suspend(&ptdev->fw->irq);
+}
+
+/**
+ * panthor_fw_post_reset() - Call after a reset.
+ * @ptdev: Device.
+ *
+ * Start the FW. If this is not a fast reset, all FW sections are reloaded to
+ * make sure we can recover from a memory corruption.
+ */
+int panthor_fw_post_reset(struct panthor_device *ptdev)
+{
+	int ret;
+
+	/* Make the MCU VM active. */
+	ret = panthor_vm_active(ptdev->fw->vm);
+	if (ret)
+		return ret;
+
+	/* If this is a fast reset, try to start the MCU without reloading
+	 * the FW sections. If it fails, go for a full reset.
+	 */
+	if (ptdev->fw->fast_reset) {
+		ret = panthor_fw_start(ptdev);
+		if (!ret)
+			goto out;
+
+		/* Force a disable, so we get a fresh boot on the next
+		 * panthor_fw_start() call.
+		 */
+		gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE);
+		drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset");
+	}
+
+	/* Reload all sections, including RO ones. We're not supposed
+	 * to end up here anyway, let's just assume the overhead of
+	 * reloading everything is acceptable.
+	 */
+	panthor_reload_fw_sections(ptdev, true);
+
+	ret = panthor_fw_start(ptdev);
+	if (ret) {
+		drm_err(&ptdev->base, "FW slow reset failed");
+		return ret;
+	}
+
+out:
+	/* We must re-initialize the global interface even on fast-reset. */
+	panthor_fw_init_global_iface(ptdev);
+	return 0;
+}
+
+/**
+ * panthor_fw_unplug() - Called when the device is unplugged.
+ * @ptdev: Device.
+ *
+ * This function must make sure all pending operations are flushed before
+ * will release device resources, thus preventing any interaction with
+ * the HW.
+ *
+ * If there is still FW-related work running after this function returns,
+ * they must use drm_dev_{enter,exit}() and skip any HW access when
+ * drm_dev_enter() returns false.
+ */
+void panthor_fw_unplug(struct panthor_device *ptdev)
+{
+	struct panthor_fw_section *section;
+
+	cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work);
+
+	/* Make sure the IRQ handler can be called after that point. */
+	if (ptdev->fw->irq.irq)
+		panthor_job_irq_suspend(&ptdev->fw->irq);
+
+	panthor_fw_stop(ptdev);
+
+	list_for_each_entry(section, &ptdev->fw->sections, node)
+		panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem);
+
+	/* We intentionally don't call panthor_vm_idle() and let
+	 * panthor_mmu_unplug() release the AS we acquired with
+	 * panthor_vm_active() so we don't have to track the VM active/idle
+	 * state to keep the active_refcnt balanced.
+	 */
+	panthor_vm_put(ptdev->fw->vm);
+
+	panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000);
+}
+
+/**
+ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW.
+ * @req_ptr: Pointer to the req register.
+ * @ack_ptr: Pointer to the ack register.
+ * @wq: Wait queue to use for the sleeping wait.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr,
+				wait_queue_head_t *wq,
+				u32 req_mask, u32 *acked,
+				u32 timeout_ms)
+{
+	u32 ack, req = READ_ONCE(*req_ptr) & req_mask;
+	int ret;
+
+	/* Busy wait for a few µsecs before falling back to a sleeping wait. */
+	*acked = req_mask;
+	ret = read_poll_timeout_atomic(READ_ONCE, ack,
+				       (ack & req_mask) == req,
+				       0, 10, 0,
+				       *ack_ptr);
+	if (!ret)
+		return 0;
+
+	if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req,
+			       msecs_to_jiffies(timeout_ms)))
+		return 0;
+
+	/* Check one last time, in case we were not woken up for some reason. */
+	ack = READ_ONCE(*ack_ptr);
+	if ((ack & req_mask) == req)
+		return 0;
+
+	*acked = ~(req ^ ack) & req_mask;
+	return -ETIMEDOUT;
+}
+
+/**
+ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged.
+ * @ptdev: Device.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev,
+			     u32 req_mask, u32 *acked,
+			     u32 timeout_ms)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	/* GLB_HALT doesn't get acked through the FW interface. */
+	if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT)))
+		return -EINVAL;
+
+	return panthor_fw_wait_acks(&glb_iface->input->req,
+				    &glb_iface->output->ack,
+				    &ptdev->fw->req_waitqueue,
+				    req_mask, acked, timeout_ms);
+}
+
+/**
+ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged.
+ * @ptdev: Device.
+ * @csg_slot: CSG slot ID.
+ * @req_mask: Mask of requests to wait for.
+ * @acked: Pointer to field that's updated with the acked requests.
+ * If the function returns 0, *acked == req_mask.
+ * @timeout_ms: Timeout expressed in milliseconds.
+ *
+ * Return: 0 on success, -ETIMEDOUT otherwise.
+ */
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot,
+			     u32 req_mask, u32 *acked, u32 timeout_ms)
+{
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot);
+	int ret;
+
+	if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK))
+		return -EINVAL;
+
+	ret = panthor_fw_wait_acks(&csg_iface->input->req,
+				   &csg_iface->output->ack,
+				   &ptdev->fw->req_waitqueue,
+				   req_mask, acked, timeout_ms);
+
+	/*
+	 * Check that all bits in the state field were updated, if any mismatch
+	 * then clear all bits in the state field. This allows code to do
+	 * (acked & CSG_STATE_MASK) and get the right value.
+	 */
+
+	if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK)
+		*acked &= ~CSG_STATE_MASK;
+
+	return ret;
+}
+
+/**
+ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells.
+ * @ptdev: Device.
+ * @csg_mask: Bitmask encoding the command stream group doorbells to ring.
+ *
+ * This function is toggling bits in the doorbell_req and ringing the
+ * global doorbell. It doesn't require a user doorbell to be attached to
+ * the group.
+ */
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask);
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+}
+
+static void panthor_fw_ping_work(struct work_struct *work)
+{
+	struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work);
+	struct panthor_device *ptdev = fw->irq.ptdev;
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	u32 acked;
+	int ret;
+
+	if (panthor_device_reset_is_pending(ptdev))
+		return;
+
+	panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING);
+	gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1);
+
+	ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		drm_err(&ptdev->base, "FW ping timeout, scheduling a reset");
+	} else {
+		mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work,
+				 msecs_to_jiffies(PING_INTERVAL_MS));
+	}
+}
+
+/**
+ * panthor_fw_init() - Initialize FW related data.
+ * @ptdev: Device.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+int panthor_fw_init(struct panthor_device *ptdev)
+{
+	struct panthor_fw *fw;
+	int ret, irq;
+
+	fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL);
+	if (!fw)
+		return -ENOMEM;
+
+	ptdev->fw = fw;
+	init_waitqueue_head(&fw->req_waitqueue);
+	INIT_LIST_HEAD(&fw->sections);
+	INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work);
+
+	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job");
+	if (irq <= 0)
+		return -ENODEV;
+
+	ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0);
+	if (ret) {
+		drm_err(&ptdev->base, "failed to request job irq");
+		return ret;
+	}
+
+	ret = panthor_gpu_l2_power_on(ptdev);
+	if (ret)
+		return ret;
+
+	fw->vm = panthor_vm_create(ptdev, true,
+				   0, SZ_4G,
+				   CSF_MCU_SHARED_REGION_START,
+				   CSF_MCU_SHARED_REGION_SIZE);
+	if (IS_ERR(fw->vm)) {
+		ret = PTR_ERR(fw->vm);
+		fw->vm = NULL;
+		goto err_unplug_fw;
+	}
+
+	ret = panthor_fw_load(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_vm_active(fw->vm);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_fw_start(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	ret = panthor_fw_init_ifaces(ptdev);
+	if (ret)
+		goto err_unplug_fw;
+
+	panthor_fw_init_global_iface(ptdev);
+	return 0;
+
+err_unplug_fw:
+	panthor_fw_unplug(ptdev);
+	return ret;
+}
+
+MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin");
diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h
new file mode 100644
index 000000000000..22448abde992
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
@@ -0,0 +1,503 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_MCU_H__
+#define __PANTHOR_MCU_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+struct panthor_kernel_bo;
+
+#define MAX_CSGS				31
+#define MAX_CS_PER_CSG                          32
+
+struct panthor_fw_ringbuf_input_iface {
+	u64 insert;
+	u64 extract;
+};
+
+struct panthor_fw_ringbuf_output_iface {
+	u64 extract;
+	u32 active;
+};
+
+struct panthor_fw_cs_control_iface {
+#define CS_FEATURES_WORK_REGS(x)		(((x) & GENMASK(7, 0)) + 1)
+#define CS_FEATURES_SCOREBOARDS(x)		(((x) & GENMASK(15, 8)) >> 8)
+#define CS_FEATURES_COMPUTE			BIT(16)
+#define CS_FEATURES_FRAGMENT			BIT(17)
+#define CS_FEATURES_TILER			BIT(18)
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+};
+
+struct panthor_fw_cs_input_iface {
+#define CS_STATE_MASK				GENMASK(2, 0)
+#define CS_STATE_STOP				0
+#define CS_STATE_START				1
+#define CS_EXTRACT_EVENT			BIT(4)
+#define CS_IDLE_SYNC_WAIT			BIT(8)
+#define CS_IDLE_PROTM_PENDING			BIT(9)
+#define CS_IDLE_EMPTY				BIT(10)
+#define CS_IDLE_RESOURCE_REQ			BIT(11)
+#define CS_TILER_OOM				BIT(26)
+#define CS_PROTM_PENDING			BIT(27)
+#define CS_FATAL				BIT(30)
+#define CS_FAULT				BIT(31)
+#define CS_REQ_MASK				(CS_STATE_MASK | \
+						 CS_EXTRACT_EVENT | \
+						 CS_IDLE_SYNC_WAIT | \
+						 CS_IDLE_PROTM_PENDING | \
+						 CS_IDLE_EMPTY | \
+						 CS_IDLE_RESOURCE_REQ)
+#define CS_EVT_MASK				(CS_TILER_OOM | \
+						 CS_PROTM_PENDING | \
+						 CS_FATAL | \
+						 CS_FAULT)
+	u32 req;
+
+#define CS_CONFIG_PRIORITY(x)			((x) & GENMASK(3, 0))
+#define CS_CONFIG_DOORBELL(x)			(((x) << 8) & GENMASK(15, 8))
+	u32 config;
+	u32 reserved1;
+	u32 ack_irq_mask;
+	u64 ringbuf_base;
+	u32 ringbuf_size;
+	u32 reserved2;
+	u64 heap_start;
+	u64 heap_end;
+	u64 ringbuf_input;
+	u64 ringbuf_output;
+	u32 instr_config;
+	u32 instrbuf_size;
+	u64 instrbuf_base;
+	u64 instrbuf_offset_ptr;
+};
+
+struct panthor_fw_cs_output_iface {
+	u32 ack;
+	u32 reserved1[15];
+	u64 status_cmd_ptr;
+
+#define CS_STATUS_WAIT_SB_MASK			GENMASK(15, 0)
+#define CS_STATUS_WAIT_SB_SRC_MASK		GENMASK(19, 16)
+#define CS_STATUS_WAIT_SB_SRC_NONE		(0 << 16)
+#define CS_STATUS_WAIT_SB_SRC_WAIT		(8 << 16)
+#define CS_STATUS_WAIT_SYNC_COND_LE		(0 << 24)
+#define CS_STATUS_WAIT_SYNC_COND_GT		(1 << 24)
+#define CS_STATUS_WAIT_SYNC_COND_MASK		GENMASK(27, 24)
+#define CS_STATUS_WAIT_PROGRESS			BIT(28)
+#define CS_STATUS_WAIT_PROTM			BIT(29)
+#define CS_STATUS_WAIT_SYNC_64B			BIT(30)
+#define CS_STATUS_WAIT_SYNC			BIT(31)
+	u32 status_wait;
+	u32 status_req_resource;
+	u64 status_wait_sync_ptr;
+	u32 status_wait_sync_value;
+	u32 status_scoreboards;
+
+#define CS_STATUS_BLOCKED_REASON_UNBLOCKED	0
+#define CS_STATUS_BLOCKED_REASON_SB_WAIT	1
+#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT	2
+#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT	3
+#define CS_STATUS_BLOCKED_REASON_DEFERRED	5
+#define CS_STATUS_BLOCKED_REASON_RES		6
+#define CS_STATUS_BLOCKED_REASON_FLUSH		7
+#define CS_STATUS_BLOCKED_REASON_MASK		GENMASK(3, 0)
+	u32 status_blocked_reason;
+	u32 status_wait_sync_value_hi;
+	u32 reserved2[6];
+
+#define CS_EXCEPTION_TYPE(x)			((x) & GENMASK(7, 0))
+#define CS_EXCEPTION_DATA(x)			(((x) >> 8) & GENMASK(23, 0))
+	u32 fault;
+	u32 fatal;
+	u64 fault_info;
+	u64 fatal_info;
+	u32 reserved3[10];
+	u32 heap_vt_start;
+	u32 heap_vt_end;
+	u32 reserved4;
+	u32 heap_frag_end;
+	u64 heap_address;
+};
+
+struct panthor_fw_csg_control_iface {
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+	u32 suspend_size;
+	u32 protm_suspend_size;
+	u32 stream_num;
+	u32 stream_stride;
+};
+
+struct panthor_fw_csg_input_iface {
+#define CSG_STATE_MASK				GENMASK(2, 0)
+#define CSG_STATE_TERMINATE			0
+#define CSG_STATE_START				1
+#define CSG_STATE_SUSPEND			2
+#define CSG_STATE_RESUME			3
+#define CSG_ENDPOINT_CONFIG			BIT(4)
+#define CSG_STATUS_UPDATE			BIT(5)
+#define CSG_SYNC_UPDATE				BIT(28)
+#define CSG_IDLE				BIT(29)
+#define CSG_DOORBELL				BIT(30)
+#define CSG_PROGRESS_TIMER_EVENT		BIT(31)
+#define CSG_REQ_MASK				(CSG_STATE_MASK | \
+						 CSG_ENDPOINT_CONFIG | \
+						 CSG_STATUS_UPDATE)
+#define CSG_EVT_MASK				(CSG_SYNC_UPDATE | \
+						 CSG_IDLE | \
+						 CSG_PROGRESS_TIMER_EVENT)
+	u32 req;
+	u32 ack_irq_mask;
+
+	u32 doorbell_req;
+	u32 cs_irq_ack;
+	u32 reserved1[4];
+	u64 allow_compute;
+	u64 allow_fragment;
+	u32 allow_other;
+
+#define CSG_EP_REQ_COMPUTE(x)			((x) & GENMASK(7, 0))
+#define CSG_EP_REQ_FRAGMENT(x)			(((x) << 8) & GENMASK(15, 8))
+#define CSG_EP_REQ_TILER(x)			(((x) << 16) & GENMASK(19, 16))
+#define CSG_EP_REQ_EXCL_COMPUTE			BIT(20)
+#define CSG_EP_REQ_EXCL_FRAGMENT		BIT(21)
+#define CSG_EP_REQ_PRIORITY(x)			(((x) << 28) & GENMASK(31, 28))
+#define CSG_EP_REQ_PRIORITY_MASK		GENMASK(31, 28)
+	u32 endpoint_req;
+	u32 reserved2[2];
+	u64 suspend_buf;
+	u64 protm_suspend_buf;
+	u32 config;
+	u32 iter_trace_config;
+};
+
+struct panthor_fw_csg_output_iface {
+	u32 ack;
+	u32 reserved1;
+	u32 doorbell_ack;
+	u32 cs_irq_req;
+	u32 status_endpoint_current;
+	u32 status_endpoint_req;
+
+#define CSG_STATUS_STATE_IS_IDLE		BIT(0)
+	u32 status_state;
+	u32 resource_dep;
+};
+
+struct panthor_fw_global_control_iface {
+	u32 version;
+	u32 features;
+	u32 input_va;
+	u32 output_va;
+	u32 group_num;
+	u32 group_stride;
+	u32 perfcnt_size;
+	u32 instr_features;
+};
+
+struct panthor_fw_global_input_iface {
+#define GLB_HALT				BIT(0)
+#define GLB_CFG_PROGRESS_TIMER			BIT(1)
+#define GLB_CFG_ALLOC_EN			BIT(2)
+#define GLB_CFG_POWEROFF_TIMER			BIT(3)
+#define GLB_PROTM_ENTER				BIT(4)
+#define GLB_PERFCNT_EN				BIT(5)
+#define GLB_PERFCNT_SAMPLE			BIT(6)
+#define GLB_COUNTER_EN				BIT(7)
+#define GLB_PING				BIT(8)
+#define GLB_FWCFG_UPDATE			BIT(9)
+#define GLB_IDLE_EN				BIT(10)
+#define GLB_SLEEP				BIT(12)
+#define GLB_INACTIVE_COMPUTE			BIT(20)
+#define GLB_INACTIVE_FRAGMENT			BIT(21)
+#define GLB_INACTIVE_TILER			BIT(22)
+#define GLB_PROTM_EXIT				BIT(23)
+#define GLB_PERFCNT_THRESHOLD			BIT(24)
+#define GLB_PERFCNT_OVERFLOW			BIT(25)
+#define GLB_IDLE				BIT(26)
+#define GLB_DBG_CSF				BIT(30)
+#define GLB_DBG_HOST				BIT(31)
+#define GLB_REQ_MASK				GENMASK(10, 0)
+#define GLB_EVT_MASK				GENMASK(26, 20)
+	u32 req;
+	u32 ack_irq_mask;
+	u32 doorbell_req;
+	u32 reserved1;
+	u32 progress_timer;
+
+#define GLB_TIMER_VAL(x)			((x) & GENMASK(30, 0))
+#define GLB_TIMER_SOURCE_GPU_COUNTER		BIT(31)
+	u32 poweroff_timer;
+	u64 core_en_mask;
+	u32 reserved2;
+	u32 perfcnt_as;
+	u64 perfcnt_base;
+	u32 perfcnt_extract;
+	u32 reserved3[3];
+	u32 perfcnt_config;
+	u32 perfcnt_csg_select;
+	u32 perfcnt_fw_enable;
+	u32 perfcnt_csg_enable;
+	u32 perfcnt_csf_enable;
+	u32 perfcnt_shader_enable;
+	u32 perfcnt_tiler_enable;
+	u32 perfcnt_mmu_l2_enable;
+	u32 reserved4[8];
+	u32 idle_timer;
+};
+
+enum panthor_fw_halt_status {
+	PANTHOR_FW_HALT_OK = 0,
+	PANTHOR_FW_HALT_ON_PANIC = 0x4e,
+	PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f,
+};
+
+struct panthor_fw_global_output_iface {
+	u32 ack;
+	u32 reserved1;
+	u32 doorbell_ack;
+	u32 reserved2;
+	u32 halt_status;
+	u32 perfcnt_status;
+	u32 perfcnt_insert;
+};
+
+/**
+ * struct panthor_fw_cs_iface - Firmware command stream slot interface
+ */
+struct panthor_fw_cs_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream slot control interface.
+	 *
+	 * Used to expose command stream slot properties.
+	 *
+	 * This interface is read-only.
+	 */
+	struct panthor_fw_cs_control_iface *control;
+
+	/**
+	 * @input: Command stream slot input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_cs_input_iface *input;
+
+	/**
+	 * @output: Command stream slot output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_cs_output_iface *output;
+};
+
+/**
+ * struct panthor_fw_csg_iface - Firmware command stream group slot interface
+ */
+struct panthor_fw_csg_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream group slot control interface.
+	 *
+	 * Used to expose command stream group slot properties.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_csg_control_iface *control;
+
+	/**
+	 * @input: Command stream slot input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_csg_input_iface *input;
+
+	/**
+	 * @output: Command stream group slot output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_csg_output_iface *output;
+};
+
+/**
+ * struct panthor_fw_global_iface - Firmware global interface
+ */
+struct panthor_fw_global_iface {
+	/**
+	 * @lock: Lock protecting access to the panthor_fw_global_input_iface::req
+	 * field.
+	 *
+	 * Needed so we can update the req field concurrently from the interrupt
+	 * handler and the scheduler/FW management logic.
+	 *
+	 * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW
+	 * interface sections are mapped uncached/write-combined right now, and
+	 * using cmpxchg() on such mappings leads to SError faults. Revisit when
+	 * we have 'SHARED' GPU mappings hooked up.
+	 */
+	spinlock_t lock;
+
+	/**
+	 * @control: Command stream group slot control interface.
+	 *
+	 * Used to expose global FW properties.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_global_control_iface *control;
+
+	/**
+	 * @input: Global input interface.
+	 *
+	 * Used for host updates/events.
+	 */
+	struct panthor_fw_global_input_iface *input;
+
+	/**
+	 * @output: Global output interface.
+	 *
+	 * Used for FW updates/events.
+	 *
+	 * This interface is read-only.
+	 */
+	const struct panthor_fw_global_output_iface *output;
+};
+
+/**
+ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW
+ * @__iface: The interface to operate on.
+ * @__in_reg: Name of the register to update in the input section of the interface.
+ * @__out_reg: Name of the register to take as a reference in the output section of the
+ * interface.
+ * @__mask: Mask to apply to the update.
+ *
+ * The Host -> FW event/message passing was designed to be lockless, with each side of
+ * the channel having its writeable section. Events are signaled as a difference between
+ * the host and FW side in the req/ack registers (when a bit differs, there's an event
+ * pending, when they are the same, nothing needs attention).
+ *
+ * This helper allows one to update the req register based on the current value of the
+ * ack register managed by the FW. Toggling a specific bit will flag an event. In order
+ * for events to be re-evaluated, the interface doorbell needs to be rung.
+ *
+ * Concurrent accesses to the same req register is covered.
+ *
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
+ */
+#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \
+	do { \
+		u32 __cur_val, __new_val, __out_val; \
+		spin_lock(&(__iface)->lock); \
+		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
+		__out_val = READ_ONCE((__iface)->output->__out_reg); \
+		__new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \
+		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
+		spin_unlock(&(__iface)->lock); \
+	} while (0)
+
+/**
+ * panthor_fw_update_reqs() - Update bits to reflect a configuration change
+ * @__iface: The interface to operate on.
+ * @__in_reg: Name of the register to update in the input section of the interface.
+ * @__val: Value to set.
+ * @__mask: Mask to apply to the update.
+ *
+ * Some configuration get passed through req registers that are also used to
+ * send events to the FW. Those req registers being updated from the interrupt
+ * handler, they require special helpers to update the configuration part as well.
+ *
+ * Concurrent accesses to the same req register is covered.
+ *
+ * Anything requiring atomic updates to multiple registers requires a dedicated lock.
+ */
+#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \
+	do { \
+		u32 __cur_val, __new_val; \
+		spin_lock(&(__iface)->lock); \
+		__cur_val = READ_ONCE((__iface)->input->__in_reg); \
+		__new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \
+		WRITE_ONCE((__iface)->input->__in_reg, __new_val); \
+		spin_unlock(&(__iface)->lock); \
+	} while (0)
+
+struct panthor_fw_global_iface *
+panthor_fw_get_glb_iface(struct panthor_device *ptdev);
+
+struct panthor_fw_csg_iface *
+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot);
+
+struct panthor_fw_cs_iface *
+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot);
+
+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask,
+			     u32 *acked, u32 timeout_ms);
+
+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked,
+			     u32 timeout_ms);
+
+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot);
+
+struct panthor_kernel_bo *
+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev,
+				 struct panthor_fw_ringbuf_input_iface **input,
+				 const struct panthor_fw_ringbuf_output_iface **output,
+				 u32 *input_fw_va, u32 *output_fw_va);
+struct panthor_kernel_bo *
+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size);
+
+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev);
+
+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang);
+int panthor_fw_post_reset(struct panthor_device *ptdev);
+
+static inline void panthor_fw_suspend(struct panthor_device *ptdev)
+{
+	panthor_fw_pre_reset(ptdev, false);
+}
+
+static inline int panthor_fw_resume(struct panthor_device *ptdev)
+{
+	return panthor_fw_post_reset(ptdev);
+}
+
+int panthor_fw_init(struct panthor_device *ptdev);
+void panthor_fw_unplug(struct panthor_device *ptdev);
+
+#endif
--
2.42.0


From 14cb6ad1bd3332914ff8a89dca0eafff76b3c5b4 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:23 +0100
Subject: [PATCH 10/69] [MERGED] drm/panthor: Add the heap logical block

Tiler heap growing requires some kernel driver involvement: when the
tiler runs out of heap memory, it will raise an exception which is
either directly handled by the firmware if some free heap chunks are
available in the heap context, or passed back to the kernel otherwise.
The heap helpers will be used by the scheduler logic to allocate more
heap chunks to a heap context, when such a situation happens.

Heap context creation is explicitly requested by userspace (using
the TILER_HEAP_CREATE ioctl), and the returned context is attached to a
queue through some command stream instruction.

All the kernel does is keep the list of heap chunks allocated to a
context, so they can be freed when TILER_HEAP_DESTROY is called, or
extended when the FW requests a new chunk.

v6:
- Add Maxime's and Heiko's acks

v5:
- Fix FIXME comment
- Add Steve's R-b

v4:
- Rework locking to allow concurrent calls to panthor_heap_grow()
- Add a helper to return a heap chunk if we couldn't pass it to the
  FW because the group was scheduled out

v3:
- Add a FIXME for the heap OOM deadlock
- Use the panthor_kernel_bo abstraction for the heap context and heap
  chunks
- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver
- Ensure that the heap context is aligned to the GPU cache line size
- Minor code tidy ups

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-10-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_heap.c | 597 +++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_heap.h |  39 ++
 2 files changed, 636 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c
new file mode 100644
index 000000000000..143fa35f2e74
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#include <linux/iosys-map.h>
+#include <linux/rwsem.h>
+
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_gem.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+
+/*
+ * The GPU heap context is an opaque structure used by the GPU to track the
+ * heap allocations. The driver should only touch it to initialize it (zero all
+ * fields). Because the CPU and GPU can both access this structure it is
+ * required to be GPU cache line aligned.
+ */
+#define HEAP_CONTEXT_SIZE	32
+
+/**
+ * struct panthor_heap_chunk_header - Heap chunk header
+ */
+struct panthor_heap_chunk_header {
+	/**
+	 * @next: Next heap chunk in the list.
+	 *
+	 * This is a GPU VA.
+	 */
+	u64 next;
+
+	/** @unknown: MBZ. */
+	u32 unknown[14];
+};
+
+/**
+ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
+ */
+struct panthor_heap_chunk {
+	/** @node: Used to insert the heap chunk in panthor_heap::chunks. */
+	struct list_head node;
+
+	/** @bo: Buffer object backing the heap chunk. */
+	struct panthor_kernel_bo *bo;
+};
+
+/**
+ * struct panthor_heap - Structure used to manage tiler heap contexts.
+ */
+struct panthor_heap {
+	/** @chunks: List containing all heap chunks allocated so far. */
+	struct list_head chunks;
+
+	/** @lock: Lock protecting insertion in the chunks list. */
+	struct mutex lock;
+
+	/** @chunk_size: Size of each chunk. */
+	u32 chunk_size;
+
+	/** @max_chunks: Maximum number of chunks. */
+	u32 max_chunks;
+
+	/**
+	 * @target_in_flight: Number of in-flight render passes after which
+	 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
+	 */
+	u32 target_in_flight;
+
+	/** @chunk_count: Number of heap chunks currently allocated. */
+	u32 chunk_count;
+};
+
+#define MAX_HEAPS_PER_POOL    128
+
+/**
+ * struct panthor_heap_pool - Pool of heap contexts
+ *
+ * The pool is attached to a panthor_file and can't be shared across processes.
+ */
+struct panthor_heap_pool {
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @vm: VM this pool is bound to. */
+	struct panthor_vm *vm;
+
+	/** @lock: Lock protecting access to @xa. */
+	struct rw_semaphore lock;
+
+	/** @xa: Array storing panthor_heap objects. */
+	struct xarray xa;
+
+	/** @gpu_contexts: Buffer object containing the GPU heap contexts. */
+	struct panthor_kernel_bo *gpu_contexts;
+};
+
+static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
+{
+	u32 l2_features = ptdev->gpu_info.l2_features;
+	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
+
+	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
+}
+
+static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
+{
+	return panthor_heap_ctx_stride(pool->ptdev) * id;
+}
+
+static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
+{
+	return pool->gpu_contexts->kmap +
+	       panthor_get_heap_ctx_offset(pool, id);
+}
+
+static void panthor_free_heap_chunk(struct panthor_vm *vm,
+				    struct panthor_heap *heap,
+				    struct panthor_heap_chunk *chunk)
+{
+	mutex_lock(&heap->lock);
+	list_del(&chunk->node);
+	heap->chunk_count--;
+	mutex_unlock(&heap->lock);
+
+	panthor_kernel_bo_destroy(vm, chunk->bo);
+	kfree(chunk);
+}
+
+static int panthor_alloc_heap_chunk(struct panthor_device *ptdev,
+				    struct panthor_vm *vm,
+				    struct panthor_heap *heap,
+				    bool initial_chunk)
+{
+	struct panthor_heap_chunk *chunk;
+	struct panthor_heap_chunk_header *hdr;
+	int ret;
+
+	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
+	if (!chunk)
+		return -ENOMEM;
+
+	chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size,
+					     DRM_PANTHOR_BO_NO_MMAP,
+					     DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+					     PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(chunk->bo)) {
+		ret = PTR_ERR(chunk->bo);
+		goto err_free_chunk;
+	}
+
+	ret = panthor_kernel_bo_vmap(chunk->bo);
+	if (ret)
+		goto err_destroy_bo;
+
+	hdr = chunk->bo->kmap;
+	memset(hdr, 0, sizeof(*hdr));
+
+	if (initial_chunk && !list_empty(&heap->chunks)) {
+		struct panthor_heap_chunk *prev_chunk;
+		u64 prev_gpuva;
+
+		prev_chunk = list_first_entry(&heap->chunks,
+					      struct panthor_heap_chunk,
+					      node);
+
+		prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo);
+		hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
+			    (heap->chunk_size >> 12);
+	}
+
+	panthor_kernel_bo_vunmap(chunk->bo);
+
+	mutex_lock(&heap->lock);
+	list_add(&chunk->node, &heap->chunks);
+	heap->chunk_count++;
+	mutex_unlock(&heap->lock);
+
+	return 0;
+
+err_destroy_bo:
+	panthor_kernel_bo_destroy(vm, chunk->bo);
+
+err_free_chunk:
+	kfree(chunk);
+
+	return ret;
+}
+
+static void panthor_free_heap_chunks(struct panthor_vm *vm,
+				     struct panthor_heap *heap)
+{
+	struct panthor_heap_chunk *chunk, *tmp;
+
+	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
+		panthor_free_heap_chunk(vm, heap, chunk);
+}
+
+static int panthor_alloc_heap_chunks(struct panthor_device *ptdev,
+				     struct panthor_vm *vm,
+				     struct panthor_heap *heap,
+				     u32 chunk_count)
+{
+	int ret;
+	u32 i;
+
+	for (i = 0; i < chunk_count; i++) {
+		ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int
+panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
+{
+	struct panthor_heap *heap;
+
+	heap = xa_erase(&pool->xa, handle);
+	if (!heap)
+		return -EINVAL;
+
+	panthor_free_heap_chunks(pool->vm, heap);
+	mutex_destroy(&heap->lock);
+	kfree(heap);
+	return 0;
+}
+
+/**
+ * panthor_heap_destroy() - Destroy a heap context
+ * @pool: Pool this context belongs to.
+ * @handle: Handle returned by panthor_heap_create().
+ */
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
+{
+	int ret;
+
+	down_write(&pool->lock);
+	ret = panthor_heap_destroy_locked(pool, handle);
+	up_write(&pool->lock);
+
+	return ret;
+}
+
+/**
+ * panthor_heap_create() - Create a heap context
+ * @pool: Pool to instantiate the heap context from.
+ * @initial_chunk_count: Number of chunk allocated at initialization time.
+ * Must be at least 1.
+ * @chunk_size: The size of each chunk. Must be a power of two between 256k
+ * and 2M.
+ * @max_chunks: Maximum number of chunks that can be allocated.
+ * @target_in_flight: Maximum number of in-flight render passes.
+ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
+ * context.
+ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
+ * assigned to the heap context.
+ *
+ * Return: a positive handle on success, a negative error otherwise.
+ */
+int panthor_heap_create(struct panthor_heap_pool *pool,
+			u32 initial_chunk_count,
+			u32 chunk_size,
+			u32 max_chunks,
+			u32 target_in_flight,
+			u64 *heap_ctx_gpu_va,
+			u64 *first_chunk_gpu_va)
+{
+	struct panthor_heap *heap;
+	struct panthor_heap_chunk *first_chunk;
+	struct panthor_vm *vm;
+	int ret = 0;
+	u32 id;
+
+	if (initial_chunk_count == 0)
+		return -EINVAL;
+
+	if (hweight32(chunk_size) != 1 ||
+	    chunk_size < SZ_256K || chunk_size > SZ_2M)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	vm = panthor_vm_get(pool->vm);
+	up_read(&pool->lock);
+
+	/* The pool has been destroyed, we can't create a new heap. */
+	if (!vm)
+		return -EINVAL;
+
+	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
+	if (!heap) {
+		ret = -ENOMEM;
+		goto err_put_vm;
+	}
+
+	mutex_init(&heap->lock);
+	INIT_LIST_HEAD(&heap->chunks);
+	heap->chunk_size = chunk_size;
+	heap->max_chunks = max_chunks;
+	heap->target_in_flight = target_in_flight;
+
+	ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap,
+					initial_chunk_count);
+	if (ret)
+		goto err_free_heap;
+
+	first_chunk = list_first_entry(&heap->chunks,
+				       struct panthor_heap_chunk,
+				       node);
+	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo);
+
+	down_write(&pool->lock);
+	/* The pool has been destroyed, we can't create a new heap. */
+	if (!pool->vm) {
+		ret = -EINVAL;
+	} else {
+		ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL);
+		if (!ret) {
+			void *gpu_ctx = panthor_get_heap_ctx(pool, id);
+
+			memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
+			*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) +
+					   panthor_get_heap_ctx_offset(pool, id);
+		}
+	}
+	up_write(&pool->lock);
+
+	if (ret)
+		goto err_free_heap;
+
+	panthor_vm_put(vm);
+	return id;
+
+err_free_heap:
+	panthor_free_heap_chunks(pool->vm, heap);
+	mutex_destroy(&heap->lock);
+	kfree(heap);
+
+err_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+/**
+ * panthor_heap_return_chunk() - Return an unused heap chunk
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @chunk_gpu_va: The chunk VA to return.
+ *
+ * This function is used when a chunk allocated with panthor_heap_grow()
+ * couldn't be linked to the heap context through the FW interface because
+ * the group requesting the allocation was scheduled out in the meantime.
+ */
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+			      u64 heap_gpu_va,
+			      u64 chunk_gpu_va)
+{
+	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+	struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
+	struct panthor_heap *heap;
+	int ret;
+
+	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	heap = xa_load(&pool->xa, heap_id);
+	if (!heap) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	chunk_gpu_va &= GENMASK_ULL(63, 12);
+
+	mutex_lock(&heap->lock);
+	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
+		if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) {
+			removed = chunk;
+			list_del(&chunk->node);
+			heap->chunk_count--;
+			break;
+		}
+	}
+	mutex_unlock(&heap->lock);
+
+	if (removed) {
+		panthor_kernel_bo_destroy(pool->vm, chunk->bo);
+		kfree(chunk);
+		ret = 0;
+	} else {
+		ret = -EINVAL;
+	}
+
+out_unlock:
+	up_read(&pool->lock);
+	return ret;
+}
+
+/**
+ * panthor_heap_grow() - Make a heap context grow.
+ * @pool: The pool this heap belongs to.
+ * @heap_gpu_va: The GPU address of the heap context.
+ * @renderpasses_in_flight: Number of render passes currently in-flight.
+ * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
+ * @new_chunk_gpu_va: Pointer used to return the chunk VA.
+ */
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+		      u64 heap_gpu_va,
+		      u32 renderpasses_in_flight,
+		      u32 pending_frag_count,
+		      u64 *new_chunk_gpu_va)
+{
+	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts);
+	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev);
+	struct panthor_heap_chunk *chunk;
+	struct panthor_heap *heap;
+	int ret;
+
+	if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
+		return -EINVAL;
+
+	down_read(&pool->lock);
+	heap = xa_load(&pool->xa, heap_id);
+	if (!heap) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* If we reached the target in-flight render passes, or if we
+	 * reached the maximum number of chunks, let the FW figure another way to
+	 * find some memory (wait for render passes to finish, or call the exception
+	 * handler provided by the userspace driver, if any).
+	 */
+	if (renderpasses_in_flight > heap->target_in_flight ||
+	    (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	} else if (heap->chunk_count >= heap->max_chunks) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	/* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
+	 * which goes through the blocking allocation path. Ultimately, we
+	 * want a non-blocking allocation, so we can immediately report to the
+	 * FW when the system is running out of memory. In that case, the FW
+	 * can call a user-provided exception handler, which might try to free
+	 * some tiler memory by issuing an intermediate fragment job. If the
+	 * exception handler can't do anything, it will flag the queue as
+	 * faulty so the job that triggered this tiler chunk allocation and all
+	 * further jobs in this queue fail immediately instead of having to
+	 * wait for the job timeout.
+	 */
+	ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false);
+	if (ret)
+		goto out_unlock;
+
+	chunk = list_first_entry(&heap->chunks,
+				 struct panthor_heap_chunk,
+				 node);
+	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) |
+			    (heap->chunk_size >> 12);
+	ret = 0;
+
+out_unlock:
+	up_read(&pool->lock);
+	return ret;
+}
+
+static void panthor_heap_pool_release(struct kref *refcount)
+{
+	struct panthor_heap_pool *pool =
+		container_of(refcount, struct panthor_heap_pool, refcount);
+
+	xa_destroy(&pool->xa);
+	kfree(pool);
+}
+
+/**
+ * panthor_heap_pool_put() - Release a heap pool reference
+ * @pool: Pool to release the reference on. Can be NULL.
+ */
+void panthor_heap_pool_put(struct panthor_heap_pool *pool)
+{
+	if (pool)
+		kref_put(&pool->refcount, panthor_heap_pool_release);
+}
+
+/**
+ * panthor_heap_pool_get() - Get a heap pool reference
+ * @pool: Pool to get the reference on. Can be NULL.
+ *
+ * Return: @pool.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool)
+{
+	if (pool)
+		kref_get(&pool->refcount);
+
+	return pool;
+}
+
+/**
+ * panthor_heap_pool_create() - Create a heap pool
+ * @ptdev: Device.
+ * @vm: The VM this heap pool will be attached to.
+ *
+ * Heap pools might contain up to 128 heap contexts, and are per-VM.
+ *
+ * Return: A valid pointer on success, a negative error code otherwise.
+ */
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
+{
+	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
+			      panthor_heap_ctx_stride(ptdev),
+			      4096);
+	struct panthor_heap_pool *pool;
+	int ret = 0;
+
+	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+	if (!pool)
+		return ERR_PTR(-ENOMEM);
+
+	/* We want a weak ref here: the heap pool belongs to the VM, so we're
+	 * sure that, as long as the heap pool exists, the VM exists too.
+	 */
+	pool->vm = vm;
+	pool->ptdev = ptdev;
+	init_rwsem(&pool->lock);
+	xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1);
+	kref_init(&pool->refcount);
+
+	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize,
+						      DRM_PANTHOR_BO_NO_MMAP,
+						      DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
+						      PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(pool->gpu_contexts)) {
+		ret = PTR_ERR(pool->gpu_contexts);
+		goto err_destroy_pool;
+	}
+
+	ret = panthor_kernel_bo_vmap(pool->gpu_contexts);
+	if (ret)
+		goto err_destroy_pool;
+
+	return pool;
+
+err_destroy_pool:
+	panthor_heap_pool_destroy(pool);
+	return ERR_PTR(ret);
+}
+
+/**
+ * panthor_heap_pool_destroy() - Destroy a heap pool.
+ * @pool: Pool to destroy.
+ *
+ * This function destroys all heap contexts and their resources. Thus
+ * preventing any use of the heap context or the chunk attached to them
+ * after that point.
+ *
+ * If the GPU still has access to some heap contexts, a fault should be
+ * triggered, which should flag the command stream groups using these
+ * context as faulty.
+ *
+ * The heap pool object is only released when all references to this pool
+ * are released.
+ */
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
+{
+	struct panthor_heap *heap;
+	unsigned long i;
+
+	if (!pool)
+		return;
+
+	down_write(&pool->lock);
+	xa_for_each(&pool->xa, i, heap)
+		drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
+
+	if (!IS_ERR_OR_NULL(pool->gpu_contexts))
+		panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts);
+
+	/* Reflects the fact the pool has been destroyed. */
+	pool->vm = NULL;
+	up_write(&pool->lock);
+
+	panthor_heap_pool_put(pool);
+}
diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h
new file mode 100644
index 000000000000..25a5f2bba445
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_heap.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_HEAP_H__
+#define __PANTHOR_HEAP_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+struct panthor_heap_pool;
+struct panthor_vm;
+
+int panthor_heap_create(struct panthor_heap_pool *pool,
+			u32 initial_chunk_count,
+			u32 chunk_size,
+			u32 max_chunks,
+			u32 target_in_flight,
+			u64 *heap_ctx_gpu_va,
+			u64 *first_chunk_gpu_va);
+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle);
+
+struct panthor_heap_pool *
+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm);
+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool);
+
+struct panthor_heap_pool *
+panthor_heap_pool_get(struct panthor_heap_pool *pool);
+void panthor_heap_pool_put(struct panthor_heap_pool *pool);
+
+int panthor_heap_grow(struct panthor_heap_pool *pool,
+		      u64 heap_gpu_va,
+		      u32 renderpasses_in_flight,
+		      u32 pending_frag_count,
+		      u64 *new_chunk_gpu_va);
+int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
+			      u64 heap_gpu_va,
+			      u64 chunk_gpu_va);
+
+#endif
--
2.42.0


From 6e8d8732a630616817369e8377328cea530b1cf5 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:24 +0100
Subject: [PATCH 11/69] [MERGED] drm/panthor: Add the scheduler logical block

This is the piece of software interacting with the FW scheduler, and
taking care of some scheduling aspects when the FW comes short of slots
scheduling slots. Indeed, the FW only expose a few slots, and the kernel
has to give all submission contexts, a chance to execute their jobs.

The kernel-side scheduler is timeslice-based, with a round-robin queue
per priority level.

Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler,
allowing us to delegate the dependency tracking to the core.

All the gory details should be documented inline.

v6:
- Add Maxime's and Heiko's acks
- Make sure the scheduler is initialized before queueing the tick work
  in the MMU fault handler
- Keep header inclusion alphabetically ordered

v5:
- Fix typos
- Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally
- Don't move the group to the waiting list tail when it was already
  waiting for a different syncobj
- Fix fatal_queues flagging in the tiler OOM path
- Don't warn when more than one job timesout on a group
- Add a warning message when we fail to allocate a heap chunk
- Add Steve's R-b

v4:
- Check drmm_mutex_init() return code
- s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in
  panthor_queue_put_syncwait_obj()
- Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked()
- Use atomic_xchg() instead of atomic_fetch_and(0)
- Fix typos
- Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs
- Defer TILER_OOM event handling to a separate workqueue to prevent
  deadlocks when the heap chunk allocation is blocked on mem-reclaim.
  This is just a temporary solution, until we add support for
  non-blocking/failable allocations
- Pass the scheduler workqueue to drm_sched instead of instantiating
  a separate one (no longer needed now that heap chunk allocation
  happens on a dedicated wq)
- Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle
  job timeouts when the system is under mem pressure, and hopefully
  free up some memory retained by these jobs

v3:
- Rework the FW event handling logic to avoid races
- Make sure MMU faults kill the group immediately
- Use the panthor_kernel_bo abstraction for group/queue buffers
- Make in_progress an atomic_t, so we can check it without the reset lock
  held
- Don't limit the number of groups per context to the FW scheduler
  capacity. Fix the limit to 128 for now.
- Add a panthor_job_vm() helper
- Account for panthor_vm changes
- Add our job fence as DMA_RESV_USAGE_WRITE to all external objects
  (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given
  we're supposed to be fully-explicit, but other drivers do that, so
  there must be a good reason
- Account for drm_sched changes
- Provide a panthor_queue_put_syncwait_obj()
- Unconditionally return groups to their idle list in
  panthor_sched_suspend()
- Condition of sched_queue_{,delayed_}work fixed to be only when a reset
  isn't pending or in progress.
- Several typos in comments fixed.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-11-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_sched.c | 3502 +++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_sched.h |   50 +
 2 files changed, 3552 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c
new file mode 100644
index 000000000000..5f7803b6fc48
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_sched.c
@@ -0,0 +1,3502 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora ltd. */
+
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_managed.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include <linux/build_bug.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dma-resv.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/iosys-map.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "panthor_devfreq.h"
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+/**
+ * DOC: Scheduler
+ *
+ * Mali CSF hardware adopts a firmware-assisted scheduling model, where
+ * the firmware takes care of scheduling aspects, to some extent.
+ *
+ * The scheduling happens at the scheduling group level, each group
+ * contains 1 to N queues (N is FW/hardware dependent, and exposed
+ * through the firmware interface). Each queue is assigned a command
+ * stream ring buffer, which serves as a way to get jobs submitted to
+ * the GPU, among other things.
+ *
+ * The firmware can schedule a maximum of M groups (M is FW/hardware
+ * dependent, and exposed through the firmware interface). Passed
+ * this maximum number of groups, the kernel must take care of
+ * rotating the groups passed to the firmware so every group gets
+ * a chance to have his queues scheduled for execution.
+ *
+ * The current implementation only supports with kernel-mode queues.
+ * In other terms, userspace doesn't have access to the ring-buffer.
+ * Instead, userspace passes indirect command stream buffers that are
+ * called from the queue ring-buffer by the kernel using a pre-defined
+ * sequence of command stream instructions to ensure the userspace driver
+ * always gets consistent results (cache maintenance,
+ * synchronization, ...).
+ *
+ * We rely on the drm_gpu_scheduler framework to deal with job
+ * dependencies and submission. As any other driver dealing with a
+ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
+ * entity has its own job scheduler. When a job is ready to be executed
+ * (all its dependencies are met), it is pushed to the appropriate
+ * queue ring-buffer, and the group is scheduled for execution if it
+ * wasn't already active.
+ *
+ * Kernel-side group scheduling is timeslice-based. When we have less
+ * groups than there are slots, the periodic tick is disabled and we
+ * just let the FW schedule the active groups. When there are more
+ * groups than slots, we let each group a chance to execute stuff for
+ * a given amount of time, and then re-evaluate and pick new groups
+ * to schedule. The group selection algorithm is based on
+ * priority+round-robin.
+ *
+ * Even though user-mode queues is out of the scope right now, the
+ * current design takes them into account by avoiding any guess on the
+ * group/queue state that would be based on information we wouldn't have
+ * if userspace was in charge of the ring-buffer. That's also one of the
+ * reason we don't do 'cooperative' scheduling (encoding FW group slot
+ * reservation as dma_fence that would be returned from the
+ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
+ * a queue of waiters, ordered by job submission order). This approach
+ * would work for kernel-mode queues, but would make user-mode queues a
+ * lot more complicated to retrofit.
+ */
+
+#define JOB_TIMEOUT_MS				5000
+
+#define MIN_CS_PER_CSG				8
+
+#define MIN_CSGS				3
+#define MAX_CSG_PRIO				0xf
+
+struct panthor_group;
+
+/**
+ * struct panthor_csg_slot - Command stream group slot
+ *
+ * This represents a FW slot for a scheduling group.
+ */
+struct panthor_csg_slot {
+	/** @group: Scheduling group bound to this slot. */
+	struct panthor_group *group;
+
+	/** @priority: Group priority. */
+	u8 priority;
+
+	/**
+	 * @idle: True if the group bound to this slot is idle.
+	 *
+	 * A group is idle when it has nothing waiting for execution on
+	 * all its queues, or when queues are blocked waiting for something
+	 * to happen (synchronization object).
+	 */
+	bool idle;
+};
+
+/**
+ * enum panthor_csg_priority - Group priority
+ */
+enum panthor_csg_priority {
+	/** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */
+	PANTHOR_CSG_PRIORITY_LOW = 0,
+
+	/** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */
+	PANTHOR_CSG_PRIORITY_MEDIUM,
+
+	/** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */
+	PANTHOR_CSG_PRIORITY_HIGH,
+
+	/**
+	 * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
+	 *
+	 * Real-time priority allows one to preempt scheduling of other
+	 * non-real-time groups. When such a group becomes executable,
+	 * it will evict the group with the lowest non-rt priority if
+	 * there's no free group slot available.
+	 *
+	 * Currently not exposed to userspace.
+	 */
+	PANTHOR_CSG_PRIORITY_RT,
+
+	/** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */
+	PANTHOR_CSG_PRIORITY_COUNT,
+};
+
+/**
+ * struct panthor_scheduler - Object used to manage the scheduler
+ */
+struct panthor_scheduler {
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/**
+	 * @wq: Workqueue used by our internal scheduler logic and
+	 * drm_gpu_scheduler.
+	 *
+	 * Used for the scheduler tick, group update or other kind of FW
+	 * event processing that can't be handled in the threaded interrupt
+	 * path. Also passed to the drm_gpu_scheduler instances embedded
+	 * in panthor_queue.
+	 */
+	struct workqueue_struct *wq;
+
+	/**
+	 * @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
+	 *
+	 * We have a queue dedicated to heap chunk allocation works to avoid
+	 * blocking the rest of the scheduler if the allocation tries to
+	 * reclaim memory.
+	 */
+	struct workqueue_struct *heap_alloc_wq;
+
+	/** @tick_work: Work executed on a scheduling tick. */
+	struct delayed_work tick_work;
+
+	/**
+	 * @sync_upd_work: Work used to process synchronization object updates.
+	 *
+	 * We use this work to unblock queues/groups that were waiting on a
+	 * synchronization object.
+	 */
+	struct work_struct sync_upd_work;
+
+	/**
+	 * @fw_events_work: Work used to process FW events outside the interrupt path.
+	 *
+	 * Even if the interrupt is threaded, we need any event processing
+	 * that require taking the panthor_scheduler::lock to be processed
+	 * outside the interrupt path so we don't block the tick logic when
+	 * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
+	 * event processing requires taking this lock, we just delegate all
+	 * FW event processing to the scheduler workqueue.
+	 */
+	struct work_struct fw_events_work;
+
+	/**
+	 * @fw_events: Bitmask encoding pending FW events.
+	 */
+	atomic_t fw_events;
+
+	/**
+	 * @resched_target: When the next tick should occur.
+	 *
+	 * Expressed in jiffies.
+	 */
+	u64 resched_target;
+
+	/**
+	 * @last_tick: When the last tick occurred.
+	 *
+	 * Expressed in jiffies.
+	 */
+	u64 last_tick;
+
+	/** @tick_period: Tick period in jiffies. */
+	u64 tick_period;
+
+	/**
+	 * @lock: Lock protecting access to all the scheduler fields.
+	 *
+	 * Should be taken in the tick work, the irq handler, and anywhere the @groups
+	 * fields are touched.
+	 */
+	struct mutex lock;
+
+	/** @groups: Various lists used to classify groups. */
+	struct {
+		/**
+		 * @runnable: Runnable group lists.
+		 *
+		 * When a group has queues that want to execute something,
+		 * its panthor_group::run_node should be inserted here.
+		 *
+		 * One list per-priority.
+		 */
+		struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
+
+		/**
+		 * @idle: Idle group lists.
+		 *
+		 * When all queues of a group are idle (either because they
+		 * have nothing to execute, or because they are blocked), the
+		 * panthor_group::run_node field should be inserted here.
+		 *
+		 * One list per-priority.
+		 */
+		struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
+
+		/**
+		 * @waiting: List of groups whose queues are blocked on a
+		 * synchronization object.
+		 *
+		 * Insert panthor_group::wait_node here when a group is waiting
+		 * for synchronization objects to be signaled.
+		 *
+		 * This list is evaluated in the @sync_upd_work work.
+		 */
+		struct list_head waiting;
+	} groups;
+
+	/**
+	 * @csg_slots: FW command stream group slots.
+	 */
+	struct panthor_csg_slot csg_slots[MAX_CSGS];
+
+	/** @csg_slot_count: Number of command stream group slots exposed by the FW. */
+	u32 csg_slot_count;
+
+	/** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */
+	u32 cs_slot_count;
+
+	/** @as_slot_count: Number of address space slots supported by the MMU. */
+	u32 as_slot_count;
+
+	/** @used_csg_slot_count: Number of command stream group slot currently used. */
+	u32 used_csg_slot_count;
+
+	/** @sb_slot_count: Number of scoreboard slots. */
+	u32 sb_slot_count;
+
+	/**
+	 * @might_have_idle_groups: True if an active group might have become idle.
+	 *
+	 * This will force a tick, so other runnable groups can be scheduled if one
+	 * or more active groups became idle.
+	 */
+	bool might_have_idle_groups;
+
+	/** @pm: Power management related fields. */
+	struct {
+		/** @has_ref: True if the scheduler owns a runtime PM reference. */
+		bool has_ref;
+	} pm;
+
+	/** @reset: Reset related fields. */
+	struct {
+		/** @lock: Lock protecting the other reset fields. */
+		struct mutex lock;
+
+		/**
+		 * @in_progress: True if a reset is in progress.
+		 *
+		 * Set to true in panthor_sched_pre_reset() and back to false in
+		 * panthor_sched_post_reset().
+		 */
+		atomic_t in_progress;
+
+		/**
+		 * @stopped_groups: List containing all groups that were stopped
+		 * before a reset.
+		 *
+		 * Insert panthor_group::run_node in the pre_reset path.
+		 */
+		struct list_head stopped_groups;
+	} reset;
+};
+
+/**
+ * struct panthor_syncobj_32b - 32-bit FW synchronization object
+ */
+struct panthor_syncobj_32b {
+	/** @seqno: Sequence number. */
+	u32 seqno;
+
+	/**
+	 * @status: Status.
+	 *
+	 * Not zero on failure.
+	 */
+	u32 status;
+};
+
+/**
+ * struct panthor_syncobj_64b - 64-bit FW synchronization object
+ */
+struct panthor_syncobj_64b {
+	/** @seqno: Sequence number. */
+	u64 seqno;
+
+	/**
+	 * @status: Status.
+	 *
+	 * Not zero on failure.
+	 */
+	u32 status;
+
+	/** @pad: MBZ. */
+	u32 pad;
+};
+
+/**
+ * struct panthor_queue - Execution queue
+ */
+struct panthor_queue {
+	/** @scheduler: DRM scheduler used for this queue. */
+	struct drm_gpu_scheduler scheduler;
+
+	/** @entity: DRM scheduling entity used for this queue. */
+	struct drm_sched_entity entity;
+
+	/**
+	 * @remaining_time: Time remaining before the job timeout expires.
+	 *
+	 * The job timeout is suspended when the queue is not scheduled by the
+	 * FW. Every time we suspend the timer, we need to save the remaining
+	 * time so we can restore it later on.
+	 */
+	unsigned long remaining_time;
+
+	/** @timeout_suspended: True if the job timeout was suspended. */
+	bool timeout_suspended;
+
+	/**
+	 * @doorbell_id: Doorbell assigned to this queue.
+	 *
+	 * Right now, all groups share the same doorbell, and the doorbell ID
+	 * is assigned to group_slot + 1 when the group is assigned a slot. But
+	 * we might decide to provide fine grained doorbell assignment at some
+	 * point, so don't have to wake up all queues in a group every time one
+	 * of them is updated.
+	 */
+	u8 doorbell_id;
+
+	/**
+	 * @priority: Priority of the queue inside the group.
+	 *
+	 * Must be less than 16 (Only 4 bits available).
+	 */
+	u8 priority;
+#define CSF_MAX_QUEUE_PRIO	GENMASK(3, 0)
+
+	/** @ringbuf: Command stream ring-buffer. */
+	struct panthor_kernel_bo *ringbuf;
+
+	/** @iface: Firmware interface. */
+	struct {
+		/** @mem: FW memory allocated for this interface. */
+		struct panthor_kernel_bo *mem;
+
+		/** @input: Input interface. */
+		struct panthor_fw_ringbuf_input_iface *input;
+
+		/** @output: Output interface. */
+		const struct panthor_fw_ringbuf_output_iface *output;
+
+		/** @input_fw_va: FW virtual address of the input interface buffer. */
+		u32 input_fw_va;
+
+		/** @output_fw_va: FW virtual address of the output interface buffer. */
+		u32 output_fw_va;
+	} iface;
+
+	/**
+	 * @syncwait: Stores information about the synchronization object this
+	 * queue is waiting on.
+	 */
+	struct {
+		/** @gpu_va: GPU address of the synchronization object. */
+		u64 gpu_va;
+
+		/** @ref: Reference value to compare against. */
+		u64 ref;
+
+		/** @gt: True if this is a greater-than test. */
+		bool gt;
+
+		/** @sync64: True if this is a 64-bit sync object. */
+		bool sync64;
+
+		/** @bo: Buffer object holding the synchronization object. */
+		struct drm_gem_object *obj;
+
+		/** @offset: Offset of the synchronization object inside @bo. */
+		u64 offset;
+
+		/**
+		 * @kmap: Kernel mapping of the buffer object holding the
+		 * synchronization object.
+		 */
+		void *kmap;
+	} syncwait;
+
+	/** @fence_ctx: Fence context fields. */
+	struct {
+		/** @lock: Used to protect access to all fences allocated by this context. */
+		spinlock_t lock;
+
+		/**
+		 * @id: Fence context ID.
+		 *
+		 * Allocated with dma_fence_context_alloc().
+		 */
+		u64 id;
+
+		/** @seqno: Sequence number of the last initialized fence. */
+		atomic64_t seqno;
+
+		/**
+		 * @in_flight_jobs: List containing all in-flight jobs.
+		 *
+		 * Used to keep track and signal panthor_job::done_fence when the
+		 * synchronization object attached to the queue is signaled.
+		 */
+		struct list_head in_flight_jobs;
+	} fence_ctx;
+};
+
+/**
+ * enum panthor_group_state - Scheduling group state.
+ */
+enum panthor_group_state {
+	/** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */
+	PANTHOR_CS_GROUP_CREATED,
+
+	/** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */
+	PANTHOR_CS_GROUP_ACTIVE,
+
+	/**
+	 * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
+	 * inactive/suspended right now.
+	 */
+	PANTHOR_CS_GROUP_SUSPENDED,
+
+	/**
+	 * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
+	 *
+	 * Can no longer be scheduled. The only allowed action is a destruction.
+	 */
+	PANTHOR_CS_GROUP_TERMINATED,
+};
+
+/**
+ * struct panthor_group - Scheduling group object
+ */
+struct panthor_group {
+	/** @refcount: Reference count */
+	struct kref refcount;
+
+	/** @ptdev: Device. */
+	struct panthor_device *ptdev;
+
+	/** @vm: VM bound to the group. */
+	struct panthor_vm *vm;
+
+	/** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */
+	u64 compute_core_mask;
+
+	/** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */
+	u64 fragment_core_mask;
+
+	/** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */
+	u64 tiler_core_mask;
+
+	/** @max_compute_cores: Maximum number of shader cores used for compute jobs. */
+	u8 max_compute_cores;
+
+	/** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */
+	u8 max_fragment_cores;
+
+	/** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */
+	u8 max_tiler_cores;
+
+	/** @priority: Group priority (check panthor_csg_priority). */
+	u8 priority;
+
+	/** @blocked_queues: Bitmask reflecting the blocked queues. */
+	u32 blocked_queues;
+
+	/** @idle_queues: Bitmask reflecting the idle queues. */
+	u32 idle_queues;
+
+	/** @fatal_lock: Lock used to protect access to fatal fields. */
+	spinlock_t fatal_lock;
+
+	/** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */
+	u32 fatal_queues;
+
+	/** @tiler_oom: Mask of queues that have a tiler OOM event to process. */
+	atomic_t tiler_oom;
+
+	/** @queue_count: Number of queues in this group. */
+	u32 queue_count;
+
+	/** @queues: Queues owned by this group. */
+	struct panthor_queue *queues[MAX_CS_PER_CSG];
+
+	/**
+	 * @csg_id: ID of the FW group slot.
+	 *
+	 * -1 when the group is not scheduled/active.
+	 */
+	int csg_id;
+
+	/**
+	 * @destroyed: True when the group has been destroyed.
+	 *
+	 * If a group is destroyed it becomes useless: no further jobs can be submitted
+	 * to its queues. We simply wait for all references to be dropped so we can
+	 * release the group object.
+	 */
+	bool destroyed;
+
+	/**
+	 * @timedout: True when a timeout occurred on any of the queues owned by
+	 * this group.
+	 *
+	 * Timeouts can be reported by drm_sched or by the FW. In any case, any
+	 * timeout situation is unrecoverable, and the group becomes useless.
+	 * We simply wait for all references to be dropped so we can release the
+	 * group object.
+	 */
+	bool timedout;
+
+	/**
+	 * @syncobjs: Pool of per-queue synchronization objects.
+	 *
+	 * One sync object per queue. The position of the sync object is
+	 * determined by the queue index.
+	 */
+	struct panthor_kernel_bo *syncobjs;
+
+	/** @state: Group state. */
+	enum panthor_group_state state;
+
+	/**
+	 * @suspend_buf: Suspend buffer.
+	 *
+	 * Stores the state of the group and its queues when a group is suspended.
+	 * Used at resume time to restore the group in its previous state.
+	 *
+	 * The size of the suspend buffer is exposed through the FW interface.
+	 */
+	struct panthor_kernel_bo *suspend_buf;
+
+	/**
+	 * @protm_suspend_buf: Protection mode suspend buffer.
+	 *
+	 * Stores the state of the group and its queues when a group that's in
+	 * protection mode is suspended.
+	 *
+	 * Used at resume time to restore the group in its previous state.
+	 *
+	 * The size of the protection mode suspend buffer is exposed through the
+	 * FW interface.
+	 */
+	struct panthor_kernel_bo *protm_suspend_buf;
+
+	/** @sync_upd_work: Work used to check/signal job fences. */
+	struct work_struct sync_upd_work;
+
+	/** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */
+	struct work_struct tiler_oom_work;
+
+	/** @term_work: Work used to finish the group termination procedure. */
+	struct work_struct term_work;
+
+	/**
+	 * @release_work: Work used to release group resources.
+	 *
+	 * We need to postpone the group release to avoid a deadlock when
+	 * the last ref is released in the tick work.
+	 */
+	struct work_struct release_work;
+
+	/**
+	 * @run_node: Node used to insert the group in the
+	 * panthor_group::groups::{runnable,idle} and
+	 * panthor_group::reset.stopped_groups lists.
+	 */
+	struct list_head run_node;
+
+	/**
+	 * @wait_node: Node used to insert the group in the
+	 * panthor_group::groups::waiting list.
+	 */
+	struct list_head wait_node;
+};
+
+/**
+ * group_queue_work() - Queue a group work
+ * @group: Group to queue the work for.
+ * @wname: Work name.
+ *
+ * Grabs a ref and queue a work item to the scheduler workqueue. If
+ * the work was already queued, we release the reference we grabbed.
+ *
+ * Work callbacks must release the reference we grabbed here.
+ */
+#define group_queue_work(group, wname) \
+	do { \
+		group_get(group); \
+		if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
+			group_put(group); \
+	} while (0)
+
+/**
+ * sched_queue_work() - Queue a scheduler work.
+ * @sched: Scheduler object.
+ * @wname: Work name.
+ *
+ * Conditionally queues a scheduler work if no reset is pending/in-progress.
+ */
+#define sched_queue_work(sched, wname) \
+	do { \
+		if (!atomic_read(&(sched)->reset.in_progress) && \
+		    !panthor_device_reset_is_pending((sched)->ptdev)) \
+			queue_work((sched)->wq, &(sched)->wname ## _work); \
+	} while (0)
+
+/**
+ * sched_queue_delayed_work() - Queue a scheduler delayed work.
+ * @sched: Scheduler object.
+ * @wname: Work name.
+ * @delay: Work delay in jiffies.
+ *
+ * Conditionally queues a scheduler delayed work if no reset is
+ * pending/in-progress.
+ */
+#define sched_queue_delayed_work(sched, wname, delay) \
+	do { \
+		if (!atomic_read(&sched->reset.in_progress) && \
+		    !panthor_device_reset_is_pending((sched)->ptdev)) \
+			mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
+	} while (0)
+
+/*
+ * We currently set the maximum of groups per file to an arbitrary low value.
+ * But this can be updated if we need more.
+ */
+#define MAX_GROUPS_PER_POOL 128
+
+/**
+ * struct panthor_group_pool - Group pool
+ *
+ * Each file get assigned a group pool.
+ */
+struct panthor_group_pool {
+	/** @xa: Xarray used to manage group handles. */
+	struct xarray xa;
+};
+
+/**
+ * struct panthor_job - Used to manage GPU job
+ */
+struct panthor_job {
+	/** @base: Inherit from drm_sched_job. */
+	struct drm_sched_job base;
+
+	/** @refcount: Reference count. */
+	struct kref refcount;
+
+	/** @group: Group of the queue this job will be pushed to. */
+	struct panthor_group *group;
+
+	/** @queue_idx: Index of the queue inside @group. */
+	u32 queue_idx;
+
+	/** @call_info: Information about the userspace command stream call. */
+	struct {
+		/** @start: GPU address of the userspace command stream. */
+		u64 start;
+
+		/** @size: Size of the userspace command stream. */
+		u32 size;
+
+		/**
+		 * @latest_flush: Flush ID at the time the userspace command
+		 * stream was built.
+		 *
+		 * Needed for the flush reduction mechanism.
+		 */
+		u32 latest_flush;
+	} call_info;
+
+	/** @ringbuf: Position of this job is in the ring buffer. */
+	struct {
+		/** @start: Start offset. */
+		u64 start;
+
+		/** @end: End offset. */
+		u64 end;
+	} ringbuf;
+
+	/**
+	 * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
+	 * list.
+	 */
+	struct list_head node;
+
+	/** @done_fence: Fence signaled when the job is finished or cancelled. */
+	struct dma_fence *done_fence;
+};
+
+static void
+panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
+{
+	if (queue->syncwait.kmap) {
+		struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
+
+		drm_gem_vunmap_unlocked(queue->syncwait.obj, &map);
+		queue->syncwait.kmap = NULL;
+	}
+
+	drm_gem_object_put(queue->syncwait.obj);
+	queue->syncwait.obj = NULL;
+}
+
+static void *
+panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_gem_object *bo;
+	struct iosys_map map;
+	int ret;
+
+	if (queue->syncwait.kmap)
+		return queue->syncwait.kmap + queue->syncwait.offset;
+
+	bo = panthor_vm_get_bo_for_va(group->vm,
+				      queue->syncwait.gpu_va,
+				      &queue->syncwait.offset);
+	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
+		goto err_put_syncwait_obj;
+
+	queue->syncwait.obj = &bo->base.base;
+	ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		goto err_put_syncwait_obj;
+
+	queue->syncwait.kmap = map.vaddr;
+	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
+		goto err_put_syncwait_obj;
+
+	return queue->syncwait.kmap + queue->syncwait.offset;
+
+err_put_syncwait_obj:
+	panthor_queue_put_syncwait_obj(queue);
+	return NULL;
+}
+
+static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue)
+{
+	if (IS_ERR_OR_NULL(queue))
+		return;
+
+	if (queue->entity.fence_context)
+		drm_sched_entity_destroy(&queue->entity);
+
+	if (queue->scheduler.ops)
+		drm_sched_fini(&queue->scheduler);
+
+	panthor_queue_put_syncwait_obj(queue);
+
+	panthor_kernel_bo_destroy(group->vm, queue->ringbuf);
+	panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem);
+
+	kfree(queue);
+}
+
+static void group_release_work(struct work_struct *work)
+{
+	struct panthor_group *group = container_of(work,
+						   struct panthor_group,
+						   release_work);
+	struct panthor_device *ptdev = group->ptdev;
+	u32 i;
+
+	for (i = 0; i < group->queue_count; i++)
+		group_free_queue(group, group->queues[i]);
+
+	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf);
+	panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf);
+	panthor_kernel_bo_destroy(group->vm, group->syncobjs);
+
+	panthor_vm_put(group->vm);
+	kfree(group);
+}
+
+static void group_release(struct kref *kref)
+{
+	struct panthor_group *group = container_of(kref,
+						   struct panthor_group,
+						   refcount);
+	struct panthor_device *ptdev = group->ptdev;
+
+	drm_WARN_ON(&ptdev->base, group->csg_id >= 0);
+	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
+	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
+
+	queue_work(panthor_cleanup_wq, &group->release_work);
+}
+
+static void group_put(struct panthor_group *group)
+{
+	if (group)
+		kref_put(&group->refcount, group_release);
+}
+
+static struct panthor_group *
+group_get(struct panthor_group *group)
+{
+	if (group)
+		kref_get(&group->refcount);
+
+	return group;
+}
+
+/**
+ * group_bind_locked() - Bind a group to a group slot
+ * @group: Group.
+ * @csg_id: Slot.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_bind_locked(struct panthor_group *group, u32 csg_id)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_csg_slot *csg_slot;
+	int ret;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS ||
+			ptdev->scheduler->csg_slots[csg_id].group))
+		return -EINVAL;
+
+	ret = panthor_vm_active(group->vm);
+	if (ret)
+		return ret;
+
+	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	group_get(group);
+	group->csg_id = csg_id;
+
+	/* Dummy doorbell allocation: doorbell is assigned to the group and
+	 * all queues use the same doorbell.
+	 *
+	 * TODO: Implement LRU-based doorbell assignment, so the most often
+	 * updated queues get their own doorbell, thus avoiding useless checks
+	 * on queues belonging to the same group that are rarely updated.
+	 */
+	for (u32 i = 0; i < group->queue_count; i++)
+		group->queues[i]->doorbell_id = csg_id + 1;
+
+	csg_slot->group = group;
+
+	return 0;
+}
+
+/**
+ * group_unbind_locked() - Unbind a group from a slot.
+ * @group: Group to unbind.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+group_unbind_locked(struct panthor_group *group)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_csg_slot *slot;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS))
+		return -EINVAL;
+
+	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
+		return -EINVAL;
+
+	slot = &ptdev->scheduler->csg_slots[group->csg_id];
+	panthor_vm_idle(group->vm);
+	group->csg_id = -1;
+
+	/* Tiler OOM events will be re-issued next time the group is scheduled. */
+	atomic_set(&group->tiler_oom, 0);
+	cancel_work(&group->tiler_oom_work);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		group->queues[i]->doorbell_id = -1;
+
+	slot->group = NULL;
+
+	group_put(group);
+	return 0;
+}
+
+/**
+ * cs_slot_prog_locked() - Program a queue slot
+ * @ptdev: Device.
+ * @csg_id: Group slot ID.
+ * @cs_id: Queue slot ID.
+ *
+ * Program a queue slot with the queue information so things can start being
+ * executed on this queue.
+ *
+ * The group slot must have a group bound to it already (group_bind_locked()).
+ */
+static void
+cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	queue->iface.input->extract = queue->iface.output->extract;
+	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
+
+	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf);
+	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
+	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
+	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) |
+				  CS_CONFIG_DOORBELL(queue->doorbell_id);
+	cs_iface->input->ack_irq_mask = ~0;
+	panthor_fw_update_reqs(cs_iface, req,
+			       CS_IDLE_SYNC_WAIT |
+			       CS_IDLE_EMPTY |
+			       CS_STATE_START |
+			       CS_EXTRACT_EVENT,
+			       CS_IDLE_SYNC_WAIT |
+			       CS_IDLE_EMPTY |
+			       CS_STATE_MASK |
+			       CS_EXTRACT_EVENT);
+	if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) {
+		drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time);
+		queue->timeout_suspended = false;
+	}
+}
+
+/**
+ * @cs_slot_reset_locked() - Reset a queue slot
+ * @ptdev: Device.
+ * @csg_id: Group slot.
+ * @cs_id: Queue slot.
+ *
+ * Change the queue slot state to STOP and suspend the queue timeout if
+ * the queue is not blocked.
+ *
+ * The group slot must have a group bound to it (group_bind_locked()).
+ */
+static int
+cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
+	struct panthor_queue *queue = group->queues[cs_id];
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	panthor_fw_update_reqs(cs_iface, req,
+			       CS_STATE_STOP,
+			       CS_STATE_MASK);
+
+	/* If the queue is blocked, we want to keep the timeout running, so
+	 * we can detect unbounded waits and kill the group when that happens.
+	 */
+	if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) {
+		queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
+		queue->timeout_suspended = true;
+		WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS));
+	}
+
+	return 0;
+}
+
+/**
+ * csg_slot_sync_priority_locked() - Synchronize the group slot priority
+ * @ptdev: Device.
+ * @csg_id: Group slot ID.
+ *
+ * Group slot priority update happens asynchronously. When we receive a
+ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
+ * reflect it to our panthor_csg_slot object.
+ */
+static void
+csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28;
+}
+
+/**
+ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
+ * @ptdev: Device.
+ * @csg_id: Group slot.
+ * @cs_id: Queue slot.
+ *
+ * Queue state is updated on group suspend or STATUS_UPDATE event.
+ */
+static void
+cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
+{
+	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
+	struct panthor_queue *queue = group->queues[cs_id];
+	struct panthor_fw_cs_iface *cs_iface =
+		panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id);
+
+	u32 status_wait_cond;
+
+	switch (cs_iface->output->status_blocked_reason) {
+	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
+		if (queue->iface.input->insert == queue->iface.output->extract &&
+		    cs_iface->output->status_scoreboards == 0)
+			group->idle_queues |= BIT(cs_id);
+		break;
+
+	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
+		if (list_empty(&group->wait_node)) {
+			list_move_tail(&group->wait_node,
+				       &group->ptdev->scheduler->groups.waiting);
+		}
+		group->blocked_queues |= BIT(cs_id);
+		queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
+		queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
+		status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
+		queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
+		if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
+			u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
+
+			queue->syncwait.sync64 = true;
+			queue->syncwait.ref |= sync_val_hi << 32;
+		} else {
+			queue->syncwait.sync64 = false;
+		}
+		break;
+
+	default:
+		/* Other reasons are not blocking. Consider the queue as runnable
+		 * in those cases.
+		 */
+		break;
+	}
+}
+
+static void
+csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	u32 i;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	group->idle_queues = 0;
+	group->blocked_queues = 0;
+
+	for (i = 0; i < group->queue_count; i++) {
+		if (group->queues[i])
+			cs_slot_sync_queue_state_locked(ptdev, csg_id, i);
+	}
+}
+
+static void
+csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+	struct panthor_group *group;
+	enum panthor_group_state new_state, old_state;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	group = csg_slot->group;
+
+	if (!group)
+		return;
+
+	old_state = group->state;
+	switch (csg_iface->output->ack & CSG_STATE_MASK) {
+	case CSG_STATE_START:
+	case CSG_STATE_RESUME:
+		new_state = PANTHOR_CS_GROUP_ACTIVE;
+		break;
+	case CSG_STATE_TERMINATE:
+		new_state = PANTHOR_CS_GROUP_TERMINATED;
+		break;
+	case CSG_STATE_SUSPEND:
+		new_state = PANTHOR_CS_GROUP_SUSPENDED;
+		break;
+	}
+
+	if (old_state == new_state)
+		return;
+
+	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
+		csg_slot_sync_queues_state_locked(ptdev, csg_id);
+
+	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
+		u32 i;
+
+		/* Reset the queue slots so we start from a clean
+		 * state when starting/resuming a new group on this
+		 * CSG slot. No wait needed here, and no ringbell
+		 * either, since the CS slot will only be re-used
+		 * on the next CSG start operation.
+		 */
+		for (i = 0; i < group->queue_count; i++) {
+			if (group->queues[i])
+				cs_slot_reset_locked(ptdev, csg_id, i);
+		}
+	}
+
+	group->state = new_state;
+}
+
+static int
+csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
+{
+	struct panthor_fw_csg_iface *csg_iface;
+	struct panthor_csg_slot *csg_slot;
+	struct panthor_group *group;
+	u32 queue_mask = 0, i;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (priority > MAX_CSG_PRIO)
+		return -EINVAL;
+
+	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
+		return -EINVAL;
+
+	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	group = csg_slot->group;
+	if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE)
+		return 0;
+
+	csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id);
+
+	for (i = 0; i < group->queue_count; i++) {
+		if (group->queues[i]) {
+			cs_slot_prog_locked(ptdev, csg_id, i);
+			queue_mask |= BIT(i);
+		}
+	}
+
+	csg_iface->input->allow_compute = group->compute_core_mask;
+	csg_iface->input->allow_fragment = group->fragment_core_mask;
+	csg_iface->input->allow_other = group->tiler_core_mask;
+	csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) |
+					 CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) |
+					 CSG_EP_REQ_TILER(group->max_tiler_cores) |
+					 CSG_EP_REQ_PRIORITY(priority);
+	csg_iface->input->config = panthor_vm_as(group->vm);
+
+	if (group->suspend_buf)
+		csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf);
+	else
+		csg_iface->input->suspend_buf = 0;
+
+	if (group->protm_suspend_buf) {
+		csg_iface->input->protm_suspend_buf =
+			panthor_kernel_bo_gpuva(group->protm_suspend_buf);
+	} else {
+		csg_iface->input->protm_suspend_buf = 0;
+	}
+
+	csg_iface->input->ack_irq_mask = ~0;
+	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
+	return 0;
+}
+
+static void
+cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
+				   u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 fatal;
+	u64 info;
+
+	lockdep_assert_held(&sched->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	fatal = cs_iface->output->fatal;
+	info = cs_iface->output->fatal_info;
+
+	if (group)
+		group->fatal_queues |= BIT(cs_id);
+
+	sched_queue_delayed_work(sched, tick, 0);
+	drm_warn(&ptdev->base,
+		 "CSG slot %d CS slot: %d\n"
+		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
+		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
+		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
+		 csg_id, cs_id,
+		 (unsigned int)CS_EXCEPTION_TYPE(fatal),
+		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
+		 (unsigned int)CS_EXCEPTION_DATA(fatal),
+		 info);
+}
+
+static void
+cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
+				   u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+	struct panthor_queue *queue = group && cs_id < group->queue_count ?
+				      group->queues[cs_id] : NULL;
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 fault;
+	u64 info;
+
+	lockdep_assert_held(&sched->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	fault = cs_iface->output->fault;
+	info = cs_iface->output->fault_info;
+
+	if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) {
+		u64 cs_extract = queue->iface.output->extract;
+		struct panthor_job *job;
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
+			if (cs_extract >= job->ringbuf.end)
+				continue;
+
+			if (cs_extract < job->ringbuf.start)
+				break;
+
+			dma_fence_set_error(job->done_fence, -EINVAL);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+	}
+
+	drm_warn(&ptdev->base,
+		 "CSG slot %d CS slot: %d\n"
+		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
+		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
+		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
+		 csg_id, cs_id,
+		 (unsigned int)CS_EXCEPTION_TYPE(fault),
+		 panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
+		 (unsigned int)CS_EXCEPTION_DATA(fault),
+		 info);
+}
+
+static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 renderpasses_in_flight, pending_frag_count;
+	struct panthor_heap_pool *heaps = NULL;
+	u64 heap_address, new_chunk_va = 0;
+	u32 vt_start, vt_end, frag_end;
+	int ret, csg_id;
+
+	mutex_lock(&sched->lock);
+	csg_id = group->csg_id;
+	if (csg_id >= 0) {
+		struct panthor_fw_cs_iface *cs_iface;
+
+		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+		heaps = panthor_vm_get_heap_pool(group->vm, false);
+		heap_address = cs_iface->output->heap_address;
+		vt_start = cs_iface->output->heap_vt_start;
+		vt_end = cs_iface->output->heap_vt_end;
+		frag_end = cs_iface->output->heap_frag_end;
+		renderpasses_in_flight = vt_start - frag_end;
+		pending_frag_count = vt_end - frag_end;
+	}
+	mutex_unlock(&sched->lock);
+
+	/* The group got scheduled out, we stop here. We will get a new tiler OOM event
+	 * when it's scheduled again.
+	 */
+	if (unlikely(csg_id < 0))
+		return 0;
+
+	if (!heaps || frag_end > vt_end || vt_end >= vt_start) {
+		ret = -EINVAL;
+	} else {
+		/* We do the allocation without holding the scheduler lock to avoid
+		 * blocking the scheduling.
+		 */
+		ret = panthor_heap_grow(heaps, heap_address,
+					renderpasses_in_flight,
+					pending_frag_count, &new_chunk_va);
+	}
+
+	if (ret && ret != -EBUSY) {
+		drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
+		group->fatal_queues |= BIT(cs_id);
+		sched_queue_delayed_work(sched, tick, 0);
+		goto out_put_heap_pool;
+	}
+
+	mutex_lock(&sched->lock);
+	csg_id = group->csg_id;
+	if (csg_id >= 0) {
+		struct panthor_fw_csg_iface *csg_iface;
+		struct panthor_fw_cs_iface *cs_iface;
+
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+		cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+
+		cs_iface->input->heap_start = new_chunk_va;
+		cs_iface->input->heap_end = new_chunk_va;
+		panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
+		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
+		panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
+	}
+	mutex_unlock(&sched->lock);
+
+	/* We allocated a chunck, but couldn't link it to the heap
+	 * context because the group was scheduled out while we were
+	 * allocating memory. We need to return this chunk to the heap.
+	 */
+	if (unlikely(csg_id < 0 && new_chunk_va))
+		panthor_heap_return_chunk(heaps, heap_address, new_chunk_va);
+
+	ret = 0;
+
+out_put_heap_pool:
+	panthor_heap_pool_put(heaps);
+	return ret;
+}
+
+static void group_tiler_oom_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, tiler_oom_work);
+	u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0);
+
+	while (tiler_oom) {
+		u32 cs_id = ffs(tiler_oom) - 1;
+
+		group_process_tiler_oom(group, cs_id);
+		tiler_oom &= ~BIT(cs_id);
+	}
+
+	group_put(group);
+}
+
+static void
+cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
+				       u32 csg_id, u32 cs_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&sched->lock);
+
+	if (drm_WARN_ON(&ptdev->base, !group))
+		return;
+
+	atomic_or(BIT(cs_id), &group->tiler_oom);
+
+	/* We don't use group_queue_work() here because we want to queue the
+	 * work item to the heap_alloc_wq.
+	 */
+	group_get(group);
+	if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work))
+		group_put(group);
+}
+
+static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
+				       u32 csg_id, u32 cs_id)
+{
+	struct panthor_fw_cs_iface *cs_iface;
+	u32 req, ack, events;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id);
+	req = cs_iface->input->req;
+	ack = cs_iface->output->ack;
+	events = (req ^ ack) & CS_EVT_MASK;
+
+	if (events & CS_FATAL)
+		cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
+
+	if (events & CS_FAULT)
+		cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
+
+	if (events & CS_TILER_OOM)
+		cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
+
+	/* We don't acknowledge the TILER_OOM event since its handling is
+	 * deferred to a separate work.
+	 */
+	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT);
+
+	return (events & (CS_FAULT | CS_TILER_OOM)) != 0;
+}
+
+static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_fw_csg_iface *csg_iface;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
+}
+
+static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	lockdep_assert_held(&sched->lock);
+
+	sched->might_have_idle_groups = true;
+
+	/* Schedule a tick so we can evict idle groups and schedule non-idle
+	 * ones. This will also update runtime PM and devfreq busy/idle states,
+	 * so the device can lower its frequency or get suspended.
+	 */
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
+					u32 csg_id)
+{
+	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (group)
+		group_queue_work(group, sync_upd);
+
+	sched_queue_work(ptdev->scheduler, sync_upd);
+}
+
+static void
+csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+	struct panthor_group *group = csg_slot->group;
+
+	lockdep_assert_held(&sched->lock);
+
+	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
+
+	group = csg_slot->group;
+	if (!drm_WARN_ON(&ptdev->base, !group))
+		group->timedout = true;
+
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
+{
+	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
+	struct panthor_fw_csg_iface *csg_iface;
+	u32 ring_cs_db_mask = 0;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
+		return;
+
+	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+	req = READ_ONCE(csg_iface->input->req);
+	ack = READ_ONCE(csg_iface->output->ack);
+	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
+	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
+	csg_events = (req ^ ack) & CSG_EVT_MASK;
+
+	/* There may not be any pending CSG/CS interrupts to process */
+	if (req == ack && cs_irq_req == cs_irq_ack)
+		return;
+
+	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
+	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
+	 * doesn't miss an interrupt for the CS in the race scenario where
+	 * whilst Host is servicing an interrupt for the CS, firmware sends
+	 * another interrupt for that CS.
+	 */
+	csg_iface->input->cs_irq_ack = cs_irq_req;
+
+	panthor_fw_update_reqs(csg_iface, req, ack,
+			       CSG_SYNC_UPDATE |
+			       CSG_IDLE |
+			       CSG_PROGRESS_TIMER_EVENT);
+
+	if (csg_events & CSG_IDLE)
+		csg_slot_process_idle_event_locked(ptdev, csg_id);
+
+	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
+		csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
+
+	cs_irqs = cs_irq_req ^ cs_irq_ack;
+	while (cs_irqs) {
+		u32 cs_id = ffs(cs_irqs) - 1;
+
+		if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
+			ring_cs_db_mask |= BIT(cs_id);
+
+		cs_irqs &= ~BIT(cs_id);
+	}
+
+	if (csg_events & CSG_SYNC_UPDATE)
+		csg_slot_sync_update_locked(ptdev, csg_id);
+
+	if (ring_cs_db_mask)
+		panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
+
+	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
+}
+
+static void sched_process_idle_event_locked(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	/* Acknowledge the idle event and schedule a tick. */
+	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
+	sched_queue_delayed_work(ptdev->scheduler, tick, 0);
+}
+
+/**
+ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ
+ * @ptdev: Device.
+ */
+static void sched_process_global_irq_locked(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	u32 req, ack, evts;
+
+	lockdep_assert_held(&ptdev->scheduler->lock);
+
+	req = READ_ONCE(glb_iface->input->req);
+	ack = READ_ONCE(glb_iface->output->ack);
+	evts = (req ^ ack) & GLB_EVT_MASK;
+
+	if (evts & GLB_IDLE)
+		sched_process_idle_event_locked(ptdev);
+}
+
+static void process_fw_events_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
+						      fw_events_work);
+	u32 events = atomic_xchg(&sched->fw_events, 0);
+	struct panthor_device *ptdev = sched->ptdev;
+
+	mutex_lock(&sched->lock);
+
+	if (events & JOB_INT_GLOBAL_IF) {
+		sched_process_global_irq_locked(ptdev);
+		events &= ~JOB_INT_GLOBAL_IF;
+	}
+
+	while (events) {
+		u32 csg_id = ffs(events) - 1;
+
+		sched_process_csg_irq_locked(ptdev, csg_id);
+		events &= ~BIT(csg_id);
+	}
+
+	mutex_unlock(&sched->lock);
+}
+
+/**
+ * panthor_sched_report_fw_events() - Report FW events to the scheduler.
+ */
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
+{
+	if (!ptdev->scheduler)
+		return;
+
+	atomic_or(events, &ptdev->scheduler->fw_events);
+	sched_queue_work(ptdev->scheduler, fw_events);
+}
+
+static const char *fence_get_driver_name(struct dma_fence *fence)
+{
+	return "panthor";
+}
+
+static const char *queue_fence_get_timeline_name(struct dma_fence *fence)
+{
+	return "queue-fence";
+}
+
+static const struct dma_fence_ops panthor_queue_fence_ops = {
+	.get_driver_name = fence_get_driver_name,
+	.get_timeline_name = queue_fence_get_timeline_name,
+};
+
+/**
+ */
+struct panthor_csg_slots_upd_ctx {
+	u32 update_mask;
+	u32 timedout_mask;
+	struct {
+		u32 value;
+		u32 mask;
+	} requests[MAX_CSGS];
+};
+
+static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
+{
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
+				    struct panthor_csg_slots_upd_ctx *ctx,
+				    u32 csg_id, u32 value, u32 mask)
+{
+	if (drm_WARN_ON(&ptdev->base, !mask) ||
+	    drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
+		return;
+
+	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask);
+	ctx->requests[csg_id].mask |= mask;
+	ctx->update_mask |= BIT(csg_id);
+}
+
+static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
+				     struct panthor_csg_slots_upd_ctx *ctx)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 update_slots = ctx->update_mask;
+
+	lockdep_assert_held(&sched->lock);
+
+	if (!ctx->update_mask)
+		return 0;
+
+	while (update_slots) {
+		struct panthor_fw_csg_iface *csg_iface;
+		u32 csg_id = ffs(update_slots) - 1;
+
+		update_slots &= ~BIT(csg_id);
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+		panthor_fw_update_reqs(csg_iface, req,
+				       ctx->requests[csg_id].value,
+				       ctx->requests[csg_id].mask);
+	}
+
+	panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask);
+
+	update_slots = ctx->update_mask;
+	while (update_slots) {
+		struct panthor_fw_csg_iface *csg_iface;
+		u32 csg_id = ffs(update_slots) - 1;
+		u32 req_mask = ctx->requests[csg_id].mask, acked;
+		int ret;
+
+		update_slots &= ~BIT(csg_id);
+		csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+
+		ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100);
+
+		if (acked & CSG_ENDPOINT_CONFIG)
+			csg_slot_sync_priority_locked(ptdev, csg_id);
+
+		if (acked & CSG_STATE_MASK)
+			csg_slot_sync_state_locked(ptdev, csg_id);
+
+		if (acked & CSG_STATUS_UPDATE) {
+			csg_slot_sync_queues_state_locked(ptdev, csg_id);
+			csg_slot_sync_idle_state_locked(ptdev, csg_id);
+		}
+
+		if (ret && acked != req_mask &&
+		    ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) {
+			drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
+			ctx->timedout_mask |= BIT(csg_id);
+		}
+	}
+
+	if (ctx->timedout_mask)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+struct panthor_sched_tick_ctx {
+	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
+	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
+	u32 idle_group_count;
+	u32 group_count;
+	enum panthor_csg_priority min_priority;
+	struct panthor_vm *vms[MAX_CS_PER_CSG];
+	u32 as_count;
+	bool immediate_tick;
+	u32 csg_upd_failed_mask;
+};
+
+static bool
+tick_ctx_is_full(const struct panthor_scheduler *sched,
+		 const struct panthor_sched_tick_ctx *ctx)
+{
+	return ctx->group_count == sched->csg_slot_count;
+}
+
+static bool
+group_is_idle(struct panthor_group *group)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	u32 inactive_queues;
+
+	if (group->csg_id >= 0)
+		return ptdev->scheduler->csg_slots[group->csg_id].idle;
+
+	inactive_queues = group->idle_queues | group->blocked_queues;
+	return hweight32(inactive_queues) == group->queue_count;
+}
+
+static bool
+group_can_run(struct panthor_group *group)
+{
+	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
+	       !group->destroyed && group->fatal_queues == 0 &&
+	       !group->timedout;
+}
+
+static void
+tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
+			       struct panthor_sched_tick_ctx *ctx,
+			       struct list_head *queue,
+			       bool skip_idle_groups,
+			       bool owned_by_tick_ctx)
+{
+	struct panthor_group *group, *tmp;
+
+	if (tick_ctx_is_full(sched, ctx))
+		return;
+
+	list_for_each_entry_safe(group, tmp, queue, run_node) {
+		u32 i;
+
+		if (!group_can_run(group))
+			continue;
+
+		if (skip_idle_groups && group_is_idle(group))
+			continue;
+
+		for (i = 0; i < ctx->as_count; i++) {
+			if (ctx->vms[i] == group->vm)
+				break;
+		}
+
+		if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
+			continue;
+
+		if (!owned_by_tick_ctx)
+			group_get(group);
+
+		list_move_tail(&group->run_node, &ctx->groups[group->priority]);
+		ctx->group_count++;
+		if (group_is_idle(group))
+			ctx->idle_group_count++;
+
+		if (i == ctx->as_count)
+			ctx->vms[ctx->as_count++] = group->vm;
+
+		if (ctx->min_priority > group->priority)
+			ctx->min_priority = group->priority;
+
+		if (tick_ctx_is_full(sched, ctx))
+			return;
+	}
+}
+
+static void
+tick_ctx_insert_old_group(struct panthor_scheduler *sched,
+			  struct panthor_sched_tick_ctx *ctx,
+			  struct panthor_group *group,
+			  bool full_tick)
+{
+	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
+	struct panthor_group *other_group;
+
+	if (!full_tick) {
+		list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
+		return;
+	}
+
+	/* Rotate to make sure groups with lower CSG slot
+	 * priorities have a chance to get a higher CSG slot
+	 * priority next time they get picked. This priority
+	 * has an impact on resource request ordering, so it's
+	 * important to make sure we don't let one group starve
+	 * all other groups with the same group priority.
+	 */
+	list_for_each_entry(other_group,
+			    &ctx->old_groups[csg_slot->group->priority],
+			    run_node) {
+		struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
+
+		if (other_csg_slot->priority > csg_slot->priority) {
+			list_add_tail(&csg_slot->group->run_node, &other_group->run_node);
+			return;
+		}
+	}
+
+	list_add_tail(&group->run_node, &ctx->old_groups[group->priority]);
+}
+
+static void
+tick_ctx_init(struct panthor_scheduler *sched,
+	      struct panthor_sched_tick_ctx *ctx,
+	      bool full_tick)
+{
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	int ret;
+	u32 i;
+
+	memset(ctx, 0, sizeof(*ctx));
+	csgs_upd_ctx_init(&upd_ctx);
+
+	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		INIT_LIST_HEAD(&ctx->groups[i]);
+		INIT_LIST_HEAD(&ctx->old_groups[i]);
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+		struct panthor_group *group = csg_slot->group;
+		struct panthor_fw_csg_iface *csg_iface;
+
+		if (!group)
+			continue;
+
+		csg_iface = panthor_fw_get_csg_iface(ptdev, i);
+		group_get(group);
+
+		/* If there was unhandled faults on the VM, force processing of
+		 * CSG IRQs, so we can flag the faulty queue.
+		 */
+		if (panthor_vm_has_unhandled_faults(group->vm)) {
+			sched_process_csg_irq_locked(ptdev, i);
+
+			/* No fatal fault reported, flag all queues as faulty. */
+			if (!group->fatal_queues)
+				group->fatal_queues |= GENMASK(group->queue_count - 1, 0);
+		}
+
+		tick_ctx_insert_old_group(sched, ctx, group, full_tick);
+		csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
+					csg_iface->output->ack ^ CSG_STATUS_UPDATE,
+					CSG_STATUS_UPDATE);
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+	}
+}
+
+#define NUM_INSTRS_PER_SLOT		16
+
+static void
+group_term_post_processing(struct panthor_group *group)
+{
+	struct panthor_job *job, *tmp;
+	LIST_HEAD(faulty_jobs);
+	bool cookie;
+	u32 i = 0;
+
+	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
+		return;
+
+	cookie = dma_fence_begin_signalling();
+	for (i = 0; i < group->queue_count; i++) {
+		struct panthor_queue *queue = group->queues[i];
+		struct panthor_syncobj_64b *syncobj;
+		int err;
+
+		if (group->fatal_queues & BIT(i))
+			err = -EINVAL;
+		else if (group->timedout)
+			err = -ETIMEDOUT;
+		else
+			err = -ECANCELED;
+
+		if (!queue)
+			continue;
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
+			list_move_tail(&job->node, &faulty_jobs);
+			dma_fence_set_error(job->done_fence, err);
+			dma_fence_signal_locked(job->done_fence);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+
+		/* Manually update the syncobj seqno to unblock waiters. */
+		syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
+		syncobj->status = ~0;
+		syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno);
+		sched_queue_work(group->ptdev->scheduler, sync_upd);
+	}
+	dma_fence_end_signalling(cookie);
+
+	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
+		list_del_init(&job->node);
+		panthor_job_put(&job->base);
+	}
+}
+
+static void group_term_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, term_work);
+
+	group_term_post_processing(group);
+	group_put(group);
+}
+
+static void
+tick_ctx_cleanup(struct panthor_scheduler *sched,
+		 struct panthor_sched_tick_ctx *ctx)
+{
+	struct panthor_group *group, *tmp;
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) {
+		list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
+			/* If everything went fine, we should only have groups
+			 * to be terminated in the old_groups lists.
+			 */
+			drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask &&
+				    group_can_run(group));
+
+			if (!group_can_run(group)) {
+				list_del_init(&group->run_node);
+				list_del_init(&group->wait_node);
+				group_queue_work(group, term);
+			} else if (group->csg_id >= 0) {
+				list_del_init(&group->run_node);
+			} else {
+				list_move(&group->run_node,
+					  group_is_idle(group) ?
+					  &sched->groups.idle[group->priority] :
+					  &sched->groups.runnable[group->priority]);
+			}
+			group_put(group);
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) {
+		/* If everything went fine, the groups to schedule lists should
+		 * be empty.
+		 */
+		drm_WARN_ON(&group->ptdev->base,
+			    !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
+
+		list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
+			if (group->csg_id >= 0) {
+				list_del_init(&group->run_node);
+			} else {
+				list_move(&group->run_node,
+					  group_is_idle(group) ?
+					  &sched->groups.idle[group->priority] :
+					  &sched->groups.runnable[group->priority]);
+			}
+			group_put(group);
+		}
+	}
+}
+
+static void
+tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx)
+{
+	struct panthor_group *group, *tmp;
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_csg_slot *csg_slot;
+	int prio, new_csg_prio = MAX_CSG_PRIO, i;
+	u32 csg_mod_mask = 0, free_csg_slots = 0;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	int ret;
+
+	csgs_upd_ctx_init(&upd_ctx);
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		/* Suspend or terminate evicted groups. */
+		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
+			bool term = !group_can_run(group);
+			int csg_id = group->csg_id;
+
+			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
+				continue;
+
+			csg_slot = &sched->csg_slots[csg_id];
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
+						CSG_STATE_MASK);
+		}
+
+		/* Update priorities on already running groups. */
+		list_for_each_entry(group, &ctx->groups[prio], run_node) {
+			struct panthor_fw_csg_iface *csg_iface;
+			int csg_id = group->csg_id;
+
+			if (csg_id < 0) {
+				new_csg_prio--;
+				continue;
+			}
+
+			csg_slot = &sched->csg_slots[csg_id];
+			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+			if (csg_slot->priority == new_csg_prio) {
+				new_csg_prio--;
+				continue;
+			}
+
+			panthor_fw_update_reqs(csg_iface, endpoint_req,
+					       CSG_EP_REQ_PRIORITY(new_csg_prio),
+					       CSG_EP_REQ_PRIORITY_MASK);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
+						CSG_ENDPOINT_CONFIG);
+			new_csg_prio--;
+		}
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+		return;
+	}
+
+	/* Unbind evicted groups. */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
+			/* This group is gone. Process interrupts to clear
+			 * any pending interrupts before we start the new
+			 * group.
+			 */
+			if (group->csg_id >= 0)
+				sched_process_csg_irq_locked(ptdev, group->csg_id);
+
+			group_unbind_locked(group);
+		}
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		if (!sched->csg_slots[i].group)
+			free_csg_slots |= BIT(i);
+	}
+
+	csgs_upd_ctx_init(&upd_ctx);
+	new_csg_prio = MAX_CSG_PRIO;
+
+	/* Start new groups. */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry(group, &ctx->groups[prio], run_node) {
+			int csg_id = group->csg_id;
+			struct panthor_fw_csg_iface *csg_iface;
+
+			if (csg_id >= 0) {
+				new_csg_prio--;
+				continue;
+			}
+
+			csg_id = ffs(free_csg_slots) - 1;
+			if (drm_WARN_ON(&ptdev->base, csg_id < 0))
+				break;
+
+			csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id);
+			csg_slot = &sched->csg_slots[csg_id];
+			csg_mod_mask |= BIT(csg_id);
+			group_bind_locked(group, csg_id);
+			csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						group->state == PANTHOR_CS_GROUP_SUSPENDED ?
+						CSG_STATE_RESUME : CSG_STATE_START,
+						CSG_STATE_MASK);
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
+						CSG_ENDPOINT_CONFIG);
+			free_csg_slots &= ~BIT(csg_id);
+		}
+	}
+
+	ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	if (ret) {
+		panthor_device_schedule_reset(ptdev);
+		ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask;
+		return;
+	}
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
+			list_del_init(&group->run_node);
+
+			/* If the group has been destroyed while we were
+			 * scheduling, ask for an immediate tick to
+			 * re-evaluate as soon as possible and get rid of
+			 * this dangling group.
+			 */
+			if (group->destroyed)
+				ctx->immediate_tick = true;
+			group_put(group);
+		}
+
+		/* Return evicted groups to the idle or run queues. Groups
+		 * that can no longer be run (because they've been destroyed
+		 * or experienced an unrecoverable error) will be scheduled
+		 * for destruction in tick_ctx_cleanup().
+		 */
+		list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
+			if (!group_can_run(group))
+				continue;
+
+			if (group_is_idle(group))
+				list_move_tail(&group->run_node, &sched->groups.idle[prio]);
+			else
+				list_move_tail(&group->run_node, &sched->groups.runnable[prio]);
+			group_put(group);
+		}
+	}
+
+	sched->used_csg_slot_count = ctx->group_count;
+	sched->might_have_idle_groups = ctx->idle_group_count > 0;
+}
+
+static u64
+tick_ctx_update_resched_target(struct panthor_scheduler *sched,
+			       const struct panthor_sched_tick_ctx *ctx)
+{
+	/* We had space left, no need to reschedule until some external event happens. */
+	if (!tick_ctx_is_full(sched, ctx))
+		goto no_tick;
+
+	/* If idle groups were scheduled, no need to wake up until some external
+	 * event happens (group unblocked, new job submitted, ...).
+	 */
+	if (ctx->idle_group_count)
+		goto no_tick;
+
+	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
+		goto no_tick;
+
+	/* If there are groups of the same priority waiting, we need to
+	 * keep the scheduler ticking, otherwise, we'll just wait for
+	 * new groups with higher priority to be queued.
+	 */
+	if (!list_empty(&sched->groups.runnable[ctx->min_priority])) {
+		u64 resched_target = sched->last_tick + sched->tick_period;
+
+		if (time_before64(sched->resched_target, sched->last_tick) ||
+		    time_before64(resched_target, sched->resched_target))
+			sched->resched_target = resched_target;
+
+		return sched->resched_target - sched->last_tick;
+	}
+
+no_tick:
+	sched->resched_target = U64_MAX;
+	return U64_MAX;
+}
+
+static void tick_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler,
+						      tick_work.work);
+	struct panthor_device *ptdev = sched->ptdev;
+	struct panthor_sched_tick_ctx ctx;
+	u64 remaining_jiffies = 0, resched_delay;
+	u64 now = get_jiffies_64();
+	int prio, ret, cookie;
+
+	if (!drm_dev_enter(&ptdev->base, &cookie))
+		return;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		goto out_dev_exit;
+
+	if (time_before64(now, sched->resched_target))
+		remaining_jiffies = sched->resched_target - now;
+
+	mutex_lock(&sched->lock);
+	if (panthor_device_reset_is_pending(sched->ptdev))
+		goto out_unlock;
+
+	tick_ctx_init(sched, &ctx, remaining_jiffies != 0);
+	if (ctx.csg_upd_failed_mask)
+		goto out_cleanup_ctx;
+
+	if (remaining_jiffies) {
+		/* Scheduling forced in the middle of a tick. Only RT groups
+		 * can preempt non-RT ones. Currently running RT groups can't be
+		 * preempted.
+		 */
+		for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+		     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+		     prio--) {
+			tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio],
+						       true, true);
+			if (prio == PANTHOR_CSG_PRIORITY_RT) {
+				tick_ctx_pick_groups_from_list(sched, &ctx,
+							       &sched->groups.runnable[prio],
+							       true, false);
+			}
+		}
+	}
+
+	/* First pick non-idle groups */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+	     prio--) {
+		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio],
+					       true, false);
+		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true);
+	}
+
+	/* If we have free CSG slots left, pick idle groups */
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1;
+	     prio >= 0 && !tick_ctx_is_full(sched, &ctx);
+	     prio--) {
+		/* Check the old_group queue first to avoid reprogramming the slots */
+		tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true);
+		tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio],
+					       false, false);
+	}
+
+	tick_ctx_apply(sched, &ctx);
+	if (ctx.csg_upd_failed_mask)
+		goto out_cleanup_ctx;
+
+	if (ctx.idle_group_count == ctx.group_count) {
+		panthor_devfreq_record_idle(sched->ptdev);
+		if (sched->pm.has_ref) {
+			pm_runtime_put_autosuspend(ptdev->base.dev);
+			sched->pm.has_ref = false;
+		}
+	} else {
+		panthor_devfreq_record_busy(sched->ptdev);
+		if (!sched->pm.has_ref) {
+			pm_runtime_get(ptdev->base.dev);
+			sched->pm.has_ref = true;
+		}
+	}
+
+	sched->last_tick = now;
+	resched_delay = tick_ctx_update_resched_target(sched, &ctx);
+	if (ctx.immediate_tick)
+		resched_delay = 0;
+
+	if (resched_delay != U64_MAX)
+		sched_queue_delayed_work(sched, tick, resched_delay);
+
+out_cleanup_ctx:
+	tick_ctx_cleanup(sched, &ctx);
+
+out_unlock:
+	mutex_unlock(&sched->lock);
+	pm_runtime_mark_last_busy(ptdev->base.dev);
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+}
+
+static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
+{
+	struct panthor_queue *queue = group->queues[queue_idx];
+	union {
+		struct panthor_syncobj_64b sync64;
+		struct panthor_syncobj_32b sync32;
+	} *syncobj;
+	bool result;
+	u64 value;
+
+	syncobj = panthor_queue_get_syncwait_obj(group, queue);
+	if (!syncobj)
+		return -EINVAL;
+
+	value = queue->syncwait.sync64 ?
+		syncobj->sync64.seqno :
+		syncobj->sync32.seqno;
+
+	if (queue->syncwait.gt)
+		result = value > queue->syncwait.ref;
+	else
+		result = value <= queue->syncwait.ref;
+
+	if (result)
+		panthor_queue_put_syncwait_obj(queue);
+
+	return result;
+}
+
+static void sync_upd_work(struct work_struct *work)
+{
+	struct panthor_scheduler *sched = container_of(work,
+						      struct panthor_scheduler,
+						      sync_upd_work);
+	struct panthor_group *group, *tmp;
+	bool immediate_tick = false;
+
+	mutex_lock(&sched->lock);
+	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
+		u32 tested_queues = group->blocked_queues;
+		u32 unblocked_queues = 0;
+
+		while (tested_queues) {
+			u32 cs_id = ffs(tested_queues) - 1;
+			int ret;
+
+			ret = panthor_queue_eval_syncwait(group, cs_id);
+			drm_WARN_ON(&group->ptdev->base, ret < 0);
+			if (ret)
+				unblocked_queues |= BIT(cs_id);
+
+			tested_queues &= ~BIT(cs_id);
+		}
+
+		if (unblocked_queues) {
+			group->blocked_queues &= ~unblocked_queues;
+
+			if (group->csg_id < 0) {
+				list_move(&group->run_node,
+					  &sched->groups.runnable[group->priority]);
+				if (group->priority == PANTHOR_CSG_PRIORITY_RT)
+					immediate_tick = true;
+			}
+		}
+
+		if (!group->blocked_queues)
+			list_del_init(&group->wait_node);
+	}
+	mutex_unlock(&sched->lock);
+
+	if (immediate_tick)
+		sched_queue_delayed_work(sched, tick, 0);
+}
+
+static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
+{
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct list_head *queue = &sched->groups.runnable[group->priority];
+	u64 delay_jiffies = 0;
+	bool was_idle;
+	u64 now;
+
+	if (!group_can_run(group))
+		return;
+
+	/* All updated queues are blocked, no need to wake up the scheduler. */
+	if ((queue_mask & group->blocked_queues) == queue_mask)
+		return;
+
+	was_idle = group_is_idle(group);
+	group->idle_queues &= ~queue_mask;
+
+	/* Don't mess up with the lists if we're in a middle of a reset. */
+	if (atomic_read(&sched->reset.in_progress))
+		return;
+
+	if (was_idle && !group_is_idle(group))
+		list_move_tail(&group->run_node, queue);
+
+	/* RT groups are preemptive. */
+	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
+		sched_queue_delayed_work(sched, tick, 0);
+		return;
+	}
+
+	/* Some groups might be idle, force an immediate tick to
+	 * re-evaluate.
+	 */
+	if (sched->might_have_idle_groups) {
+		sched_queue_delayed_work(sched, tick, 0);
+		return;
+	}
+
+	/* Scheduler is ticking, nothing to do. */
+	if (sched->resched_target != U64_MAX) {
+		/* If there are free slots, force immediating ticking. */
+		if (sched->used_csg_slot_count < sched->csg_slot_count)
+			sched_queue_delayed_work(sched, tick, 0);
+
+		return;
+	}
+
+	/* Scheduler tick was off, recalculate the resched_target based on the
+	 * last tick event, and queue the scheduler work.
+	 */
+	now = get_jiffies_64();
+	sched->resched_target = sched->last_tick + sched->tick_period;
+	if (sched->used_csg_slot_count == sched->csg_slot_count &&
+	    time_before64(now, sched->resched_target))
+		delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
+
+	sched_queue_delayed_work(sched, tick, delay_jiffies);
+}
+
+static void queue_stop(struct panthor_queue *queue,
+		       struct panthor_job *bad_job)
+{
+	drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL);
+}
+
+static void queue_start(struct panthor_queue *queue)
+{
+	struct panthor_job *job;
+
+	/* Re-assign the parent fences. */
+	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
+		job->base.s_fence->parent = dma_fence_get(job->done_fence);
+
+	drm_sched_start(&queue->scheduler, true);
+}
+
+static void panthor_group_stop(struct panthor_group *group)
+{
+	struct panthor_scheduler *sched = group->ptdev->scheduler;
+
+	lockdep_assert_held(&sched->reset.lock);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		queue_stop(group->queues[i], NULL);
+
+	group_get(group);
+	list_move_tail(&group->run_node, &sched->reset.stopped_groups);
+}
+
+static void panthor_group_start(struct panthor_group *group)
+{
+	struct panthor_scheduler *sched = group->ptdev->scheduler;
+
+	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
+
+	for (u32 i = 0; i < group->queue_count; i++)
+		queue_start(group->queues[i]);
+
+	if (group_can_run(group)) {
+		list_move_tail(&group->run_node,
+			       group_is_idle(group) ?
+			       &sched->groups.idle[group->priority] :
+			       &sched->groups.runnable[group->priority]);
+	} else {
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+		group_queue_work(group, term);
+	}
+
+	group_put(group);
+}
+
+static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	sched_queue_delayed_work(sched, tick, 0);
+}
+
+/**
+ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
+ */
+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
+{
+	/* Force a tick to immediately kill faulty groups. */
+	if (ptdev->scheduler)
+		panthor_sched_immediate_tick(ptdev);
+}
+
+void panthor_sched_resume(struct panthor_device *ptdev)
+{
+	/* Force a tick to re-evaluate after a resume. */
+	panthor_sched_immediate_tick(ptdev);
+}
+
+void panthor_sched_suspend(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_csg_slots_upd_ctx upd_ctx;
+	u64 suspended_slots, faulty_slots;
+	struct panthor_group *group;
+	u32 i;
+
+	mutex_lock(&sched->lock);
+	csgs_upd_ctx_init(&upd_ctx);
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+
+		if (csg_slot->group) {
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i,
+						CSG_STATE_SUSPEND,
+						CSG_STATE_MASK);
+		}
+	}
+
+	suspended_slots = upd_ctx.update_mask;
+
+	csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+	suspended_slots &= ~upd_ctx.timedout_mask;
+	faulty_slots = upd_ctx.timedout_mask;
+
+	if (faulty_slots) {
+		u32 slot_mask = faulty_slots;
+
+		drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
+		csgs_upd_ctx_init(&upd_ctx);
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+
+			csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id,
+						CSG_STATE_TERMINATE,
+						CSG_STATE_MASK);
+			slot_mask &= ~BIT(csg_id);
+		}
+
+		csgs_upd_ctx_apply_locked(ptdev, &upd_ctx);
+
+		slot_mask = upd_ctx.timedout_mask;
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+			/* Terminate command timedout, but the soft-reset will
+			 * automatically terminate all active groups, so let's
+			 * force the state to halted here.
+			 */
+			if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED)
+				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+			slot_mask &= ~BIT(csg_id);
+		}
+	}
+
+	/* Flush L2 and LSC caches to make sure suspend state is up-to-date.
+	 * If the flush fails, flag all queues for termination.
+	 */
+	if (suspended_slots) {
+		bool flush_caches_failed = false;
+		u32 slot_mask = suspended_slots;
+
+		if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0))
+			flush_caches_failed = true;
+
+		while (slot_mask) {
+			u32 csg_id = ffs(slot_mask) - 1;
+			struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
+
+			if (flush_caches_failed)
+				csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
+			else
+				csg_slot_sync_update_locked(ptdev, csg_id);
+
+			slot_mask &= ~BIT(csg_id);
+		}
+
+		if (flush_caches_failed)
+			faulty_slots |= suspended_slots;
+	}
+
+	for (i = 0; i < sched->csg_slot_count; i++) {
+		struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
+
+		group = csg_slot->group;
+		if (!group)
+			continue;
+
+		group_get(group);
+
+		if (group->csg_id >= 0)
+			sched_process_csg_irq_locked(ptdev, group->csg_id);
+
+		group_unbind_locked(group);
+
+		drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
+
+		if (group_can_run(group)) {
+			list_add(&group->run_node,
+				 &sched->groups.idle[group->priority]);
+		} else {
+			/* We don't bother stopping the scheduler if the group is
+			 * faulty, the group termination work will finish the job.
+			 */
+			list_del_init(&group->wait_node);
+			group_queue_work(group, term);
+		}
+		group_put(group);
+	}
+	mutex_unlock(&sched->lock);
+}
+
+void panthor_sched_pre_reset(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group, *group_tmp;
+	u32 i;
+
+	mutex_lock(&sched->reset.lock);
+	atomic_set(&sched->reset.in_progress, true);
+
+	/* Cancel all scheduler works. Once this is done, these works can't be
+	 * scheduled again until the reset operation is complete.
+	 */
+	cancel_work_sync(&sched->sync_upd_work);
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	panthor_sched_suspend(ptdev);
+
+	/* Stop all groups that might still accept jobs, so we don't get passed
+	 * new jobs while we're resetting.
+	 */
+	for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) {
+		/* All groups should be in the idle lists. */
+		drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
+		list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
+			panthor_group_stop(group);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) {
+		list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
+			panthor_group_stop(group);
+	}
+
+	mutex_unlock(&sched->reset.lock);
+}
+
+void panthor_sched_post_reset(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group, *group_tmp;
+
+	mutex_lock(&sched->reset.lock);
+
+	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node)
+		panthor_group_start(group);
+
+	/* We're done resetting the GPU, clear the reset.in_progress bit so we can
+	 * kick the scheduler.
+	 */
+	atomic_set(&sched->reset.in_progress, false);
+	mutex_unlock(&sched->reset.lock);
+
+	sched_queue_delayed_work(sched, tick, 0);
+
+	sched_queue_work(sched, sync_upd);
+}
+
+static void group_sync_upd_work(struct work_struct *work)
+{
+	struct panthor_group *group =
+		container_of(work, struct panthor_group, sync_upd_work);
+	struct panthor_job *job, *job_tmp;
+	LIST_HEAD(done_jobs);
+	u32 queue_idx;
+	bool cookie;
+
+	cookie = dma_fence_begin_signalling();
+	for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) {
+		struct panthor_queue *queue = group->queues[queue_idx];
+		struct panthor_syncobj_64b *syncobj;
+
+		if (!queue)
+			continue;
+
+		syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj));
+
+		spin_lock(&queue->fence_ctx.lock);
+		list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
+			if (!job->call_info.size)
+				continue;
+
+			if (syncobj->seqno < job->done_fence->seqno)
+				break;
+
+			list_move_tail(&job->node, &done_jobs);
+			dma_fence_signal_locked(job->done_fence);
+		}
+		spin_unlock(&queue->fence_ctx.lock);
+	}
+	dma_fence_end_signalling(cookie);
+
+	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
+		list_del_init(&job->node);
+		panthor_job_put(&job->base);
+	}
+
+	group_put(group);
+}
+
+static struct dma_fence *
+queue_run_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+	struct panthor_group *group = job->group;
+	struct panthor_queue *queue = group->queues[job->queue_idx];
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf);
+	u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1);
+	u64 addr_reg = ptdev->csif_info.cs_reg_count -
+		       ptdev->csif_info.unpreserved_cs_reg_count;
+	u64 val_reg = addr_reg + 2;
+	u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) +
+			job->queue_idx * sizeof(struct panthor_syncobj_64b);
+	u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0);
+	struct dma_fence *done_fence;
+	int ret;
+
+	u64 call_instrs[NUM_INSTRS_PER_SLOT] = {
+		/* MOV32 rX+2, cs.latest_flush */
+		(2ull << 56) | (val_reg << 48) | job->call_info.latest_flush,
+
+		/* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */
+		(36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233,
+
+		/* MOV48 rX:rX+1, cs.start */
+		(1ull << 56) | (addr_reg << 48) | job->call_info.start,
+
+		/* MOV32 rX+2, cs.size */
+		(2ull << 56) | (val_reg << 48) | job->call_info.size,
+
+		/* WAIT(0) => waits for FLUSH_CACHE2 instruction */
+		(3ull << 56) | (1 << 16),
+
+		/* CALL rX:rX+1, rX+2 */
+		(32ull << 56) | (addr_reg << 40) | (val_reg << 32),
+
+		/* MOV48 rX:rX+1, sync_addr */
+		(1ull << 56) | (addr_reg << 48) | sync_addr,
+
+		/* MOV48 rX+2, #1 */
+		(1ull << 56) | (val_reg << 48) | 1,
+
+		/* WAIT(all) */
+		(3ull << 56) | (waitall_mask << 16),
+
+		/* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/
+		(51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1,
+
+		/* ERROR_BARRIER, so we can recover from faults at job
+		 * boundaries.
+		 */
+		(47ull << 56),
+	};
+
+	/* Need to be cacheline aligned to please the prefetcher. */
+	static_assert(sizeof(call_instrs) % 64 == 0,
+		      "call_instrs is not aligned on a cacheline");
+
+	/* Stream size is zero, nothing to do => return a NULL fence and let
+	 * drm_sched signal the parent.
+	 */
+	if (!job->call_info.size)
+		return NULL;
+
+	ret = pm_runtime_resume_and_get(ptdev->base.dev);
+	if (drm_WARN_ON(&ptdev->base, ret))
+		return ERR_PTR(ret);
+
+	mutex_lock(&sched->lock);
+	if (!group_can_run(group)) {
+		done_fence = ERR_PTR(-ECANCELED);
+		goto out_unlock;
+	}
+
+	dma_fence_init(job->done_fence,
+		       &panthor_queue_fence_ops,
+		       &queue->fence_ctx.lock,
+		       queue->fence_ctx.id,
+		       atomic64_inc_return(&queue->fence_ctx.seqno));
+
+	memcpy(queue->ringbuf->kmap + ringbuf_insert,
+	       call_instrs, sizeof(call_instrs));
+
+	panthor_job_get(&job->base);
+	spin_lock(&queue->fence_ctx.lock);
+	list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs);
+	spin_unlock(&queue->fence_ctx.lock);
+
+	job->ringbuf.start = queue->iface.input->insert;
+	job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs);
+
+	/* Make sure the ring buffer is updated before the INSERT
+	 * register.
+	 */
+	wmb();
+
+	queue->iface.input->extract = queue->iface.output->extract;
+	queue->iface.input->insert = job->ringbuf.end;
+
+	if (group->csg_id < 0) {
+		/* If the queue is blocked, we want to keep the timeout running, so we
+		 * can detect unbounded waits and kill the group when that happens.
+		 * Otherwise, we suspend the timeout so the time we spend waiting for
+		 * a CSG slot is not counted.
+		 */
+		if (!(group->blocked_queues & BIT(job->queue_idx)) &&
+		    !queue->timeout_suspended) {
+			queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler);
+			queue->timeout_suspended = true;
+		}
+
+		group_schedule_locked(group, BIT(job->queue_idx));
+	} else {
+		gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1);
+		if (!sched->pm.has_ref &&
+		    !(group->blocked_queues & BIT(job->queue_idx))) {
+			pm_runtime_get(ptdev->base.dev);
+			sched->pm.has_ref = true;
+		}
+	}
+
+	done_fence = dma_fence_get(job->done_fence);
+
+out_unlock:
+	mutex_unlock(&sched->lock);
+	pm_runtime_mark_last_busy(ptdev->base.dev);
+	pm_runtime_put_autosuspend(ptdev->base.dev);
+
+	return done_fence;
+}
+
+static enum drm_gpu_sched_stat
+queue_timedout_job(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+	struct panthor_group *group = job->group;
+	struct panthor_device *ptdev = group->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_queue *queue = group->queues[job->queue_idx];
+
+	drm_warn(&ptdev->base, "job timeout\n");
+
+	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
+
+	queue_stop(queue, job);
+
+	mutex_lock(&sched->lock);
+	group->timedout = true;
+	if (group->csg_id >= 0) {
+		sched_queue_delayed_work(ptdev->scheduler, tick, 0);
+	} else {
+		/* Remove from the run queues, so the scheduler can't
+		 * pick the group on the next tick.
+		 */
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+
+		group_queue_work(group, term);
+	}
+	mutex_unlock(&sched->lock);
+
+	queue_start(queue);
+
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+static void queue_free_job(struct drm_sched_job *sched_job)
+{
+	drm_sched_job_cleanup(sched_job);
+	panthor_job_put(sched_job);
+}
+
+static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
+	.run_job = queue_run_job,
+	.timedout_job = queue_timedout_job,
+	.free_job = queue_free_job,
+};
+
+static struct panthor_queue *
+group_create_queue(struct panthor_group *group,
+		   const struct drm_panthor_queue_create *args)
+{
+	struct drm_gpu_scheduler *drm_sched;
+	struct panthor_queue *queue;
+	int ret;
+
+	if (args->pad[0] || args->pad[1] || args->pad[2])
+		return ERR_PTR(-EINVAL);
+
+	if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K ||
+	    !is_power_of_2(args->ringbuf_size))
+		return ERR_PTR(-EINVAL);
+
+	if (args->priority > CSF_MAX_QUEUE_PRIO)
+		return ERR_PTR(-EINVAL);
+
+	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+	if (!queue)
+		return ERR_PTR(-ENOMEM);
+
+	queue->fence_ctx.id = dma_fence_context_alloc(1);
+	spin_lock_init(&queue->fence_ctx.lock);
+	INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs);
+
+	queue->priority = args->priority;
+
+	queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm,
+						  args->ringbuf_size,
+						  DRM_PANTHOR_BO_NO_MMAP,
+						  DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+						  DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+						  PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(queue->ringbuf)) {
+		ret = PTR_ERR(queue->ringbuf);
+		goto err_free_queue;
+	}
+
+	ret = panthor_kernel_bo_vmap(queue->ringbuf);
+	if (ret)
+		goto err_free_queue;
+
+	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev,
+							    &queue->iface.input,
+							    &queue->iface.output,
+							    &queue->iface.input_fw_va,
+							    &queue->iface.output_fw_va);
+	if (IS_ERR(queue->iface.mem)) {
+		ret = PTR_ERR(queue->iface.mem);
+		goto err_free_queue;
+	}
+
+	ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops,
+			     group->ptdev->scheduler->wq, 1,
+			     args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)),
+			     0, msecs_to_jiffies(JOB_TIMEOUT_MS),
+			     group->ptdev->reset.wq,
+			     NULL, "panthor-queue", group->ptdev->base.dev);
+	if (ret)
+		goto err_free_queue;
+
+	drm_sched = &queue->scheduler;
+	ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL);
+
+	return queue;
+
+err_free_queue:
+	group_free_queue(group, queue);
+	return ERR_PTR(ret);
+}
+
+#define MAX_GROUPS_PER_POOL		128
+
+int panthor_group_create(struct panthor_file *pfile,
+			 const struct drm_panthor_group_create *group_args,
+			 const struct drm_panthor_queue_create *queue_args)
+{
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
+	struct panthor_group *group = NULL;
+	u32 gid, i, suspend_size;
+	int ret;
+
+	if (group_args->pad)
+		return -EINVAL;
+
+	if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH)
+		return -EINVAL;
+
+	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) ||
+	    (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) ||
+	    (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
+		return -EINVAL;
+
+	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores ||
+	    hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores ||
+	    hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
+		return -EINVAL;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
+	spin_lock_init(&group->fatal_lock);
+	kref_init(&group->refcount);
+	group->state = PANTHOR_CS_GROUP_CREATED;
+	group->csg_id = -1;
+
+	group->ptdev = ptdev;
+	group->max_compute_cores = group_args->max_compute_cores;
+	group->compute_core_mask = group_args->compute_core_mask;
+	group->max_fragment_cores = group_args->max_fragment_cores;
+	group->fragment_core_mask = group_args->fragment_core_mask;
+	group->max_tiler_cores = group_args->max_tiler_cores;
+	group->tiler_core_mask = group_args->tiler_core_mask;
+	group->priority = group_args->priority;
+
+	INIT_LIST_HEAD(&group->wait_node);
+	INIT_LIST_HEAD(&group->run_node);
+	INIT_WORK(&group->term_work, group_term_work);
+	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
+	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
+	INIT_WORK(&group->release_work, group_release_work);
+
+	group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id);
+	if (!group->vm) {
+		ret = -EINVAL;
+		goto err_put_group;
+	}
+
+	suspend_size = csg_iface->control->suspend_size;
+	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
+	if (IS_ERR(group->suspend_buf)) {
+		ret = PTR_ERR(group->suspend_buf);
+		group->suspend_buf = NULL;
+		goto err_put_group;
+	}
+
+	suspend_size = csg_iface->control->protm_suspend_size;
+	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size);
+	if (IS_ERR(group->protm_suspend_buf)) {
+		ret = PTR_ERR(group->protm_suspend_buf);
+		group->protm_suspend_buf = NULL;
+		goto err_put_group;
+	}
+
+	group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm,
+						   group_args->queues.count *
+						   sizeof(struct panthor_syncobj_64b),
+						   DRM_PANTHOR_BO_NO_MMAP,
+						   DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC |
+						   DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
+						   PANTHOR_VM_KERNEL_AUTO_VA);
+	if (IS_ERR(group->syncobjs)) {
+		ret = PTR_ERR(group->syncobjs);
+		goto err_put_group;
+	}
+
+	ret = panthor_kernel_bo_vmap(group->syncobjs);
+	if (ret)
+		goto err_put_group;
+
+	memset(group->syncobjs->kmap, 0,
+	       group_args->queues.count * sizeof(struct panthor_syncobj_64b));
+
+	for (i = 0; i < group_args->queues.count; i++) {
+		group->queues[i] = group_create_queue(group, &queue_args[i]);
+		if (IS_ERR(group->queues[i])) {
+			ret = PTR_ERR(group->queues[i]);
+			group->queues[i] = NULL;
+			goto err_put_group;
+		}
+
+		group->queue_count++;
+	}
+
+	group->idle_queues = GENMASK(group->queue_count - 1, 0);
+
+	ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL);
+	if (ret)
+		goto err_put_group;
+
+	mutex_lock(&sched->reset.lock);
+	if (atomic_read(&sched->reset.in_progress)) {
+		panthor_group_stop(group);
+	} else {
+		mutex_lock(&sched->lock);
+		list_add_tail(&group->run_node,
+			      &sched->groups.idle[group->priority]);
+		mutex_unlock(&sched->lock);
+	}
+	mutex_unlock(&sched->reset.lock);
+
+	return gid;
+
+err_put_group:
+	group_put(group);
+	return ret;
+}
+
+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group;
+
+	group = xa_erase(&gpool->xa, group_handle);
+	if (!group)
+		return -EINVAL;
+
+	for (u32 i = 0; i < group->queue_count; i++) {
+		if (group->queues[i])
+			drm_sched_entity_destroy(&group->queues[i]->entity);
+	}
+
+	mutex_lock(&sched->reset.lock);
+	mutex_lock(&sched->lock);
+	group->destroyed = true;
+	if (group->csg_id >= 0) {
+		sched_queue_delayed_work(sched, tick, 0);
+	} else if (!atomic_read(&sched->reset.in_progress)) {
+		/* Remove from the run queues, so the scheduler can't
+		 * pick the group on the next tick.
+		 */
+		list_del_init(&group->run_node);
+		list_del_init(&group->wait_node);
+		group_queue_work(group, term);
+	}
+	mutex_unlock(&sched->lock);
+	mutex_unlock(&sched->reset.lock);
+
+	group_put(group);
+	return 0;
+}
+
+int panthor_group_get_state(struct panthor_file *pfile,
+			    struct drm_panthor_group_get_state *get_state)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_device *ptdev = pfile->ptdev;
+	struct panthor_scheduler *sched = ptdev->scheduler;
+	struct panthor_group *group;
+
+	if (get_state->pad)
+		return -EINVAL;
+
+	group = group_get(xa_load(&gpool->xa, get_state->group_handle));
+	if (!group)
+		return -EINVAL;
+
+	memset(get_state, 0, sizeof(*get_state));
+
+	mutex_lock(&sched->lock);
+	if (group->timedout)
+		get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
+	if (group->fatal_queues) {
+		get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
+		get_state->fatal_queues = group->fatal_queues;
+	}
+	mutex_unlock(&sched->lock);
+
+	group_put(group);
+	return 0;
+}
+
+int panthor_group_pool_create(struct panthor_file *pfile)
+{
+	struct panthor_group_pool *gpool;
+
+	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
+	if (!gpool)
+		return -ENOMEM;
+
+	xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1);
+	pfile->groups = gpool;
+	return 0;
+}
+
+void panthor_group_pool_destroy(struct panthor_file *pfile)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_group *group;
+	unsigned long i;
+
+	if (IS_ERR_OR_NULL(gpool))
+		return;
+
+	xa_for_each(&gpool->xa, i, group)
+		panthor_group_destroy(pfile, i);
+
+	xa_destroy(&gpool->xa);
+	kfree(gpool);
+	pfile->groups = NULL;
+}
+
+static void job_release(struct kref *ref)
+{
+	struct panthor_job *job = container_of(ref, struct panthor_job, refcount);
+
+	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
+
+	if (job->base.s_fence)
+		drm_sched_job_cleanup(&job->base);
+
+	if (job->done_fence && job->done_fence->ops)
+		dma_fence_put(job->done_fence);
+	else
+		dma_fence_free(job->done_fence);
+
+	group_put(job->group);
+
+	kfree(job);
+}
+
+struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job)
+{
+	if (sched_job) {
+		struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+		kref_get(&job->refcount);
+	}
+
+	return sched_job;
+}
+
+void panthor_job_put(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	if (sched_job)
+		kref_put(&job->refcount, job_release);
+}
+
+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	return job->group->vm;
+}
+
+struct drm_sched_job *
+panthor_job_create(struct panthor_file *pfile,
+		   u16 group_handle,
+		   const struct drm_panthor_queue_submit *qsubmit)
+{
+	struct panthor_group_pool *gpool = pfile->groups;
+	struct panthor_job *job;
+	int ret;
+
+	if (qsubmit->pad)
+		return ERR_PTR(-EINVAL);
+
+	/* If stream_addr is zero, so stream_size should be. */
+	if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0))
+		return ERR_PTR(-EINVAL);
+
+	/* Make sure the address is aligned on 64-byte (cacheline) and the size is
+	 * aligned on 8-byte (instruction size).
+	 */
+	if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7))
+		return ERR_PTR(-EINVAL);
+
+	/* bits 24:30 must be zero. */
+	if (qsubmit->latest_flush & GENMASK(30, 24))
+		return ERR_PTR(-EINVAL);
+
+	job = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&job->refcount);
+	job->queue_idx = qsubmit->queue_index;
+	job->call_info.size = qsubmit->stream_size;
+	job->call_info.start = qsubmit->stream_addr;
+	job->call_info.latest_flush = qsubmit->latest_flush;
+	INIT_LIST_HEAD(&job->node);
+
+	job->group = group_get(xa_load(&gpool->xa, group_handle));
+	if (!job->group) {
+		ret = -EINVAL;
+		goto err_put_job;
+	}
+
+	if (job->queue_idx >= job->group->queue_count ||
+	    !job->group->queues[job->queue_idx]) {
+		ret = -EINVAL;
+		goto err_put_job;
+	}
+
+	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
+	if (!job->done_fence) {
+		ret = -ENOMEM;
+		goto err_put_job;
+	}
+
+	ret = drm_sched_job_init(&job->base,
+				 &job->group->queues[job->queue_idx]->entity,
+				 1, job->group);
+	if (ret)
+		goto err_put_job;
+
+	return &job->base;
+
+err_put_job:
+	panthor_job_put(&job->base);
+	return ERR_PTR(ret);
+}
+
+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job)
+{
+	struct panthor_job *job = container_of(sched_job, struct panthor_job, base);
+
+	/* Still not sure why we want USAGE_WRITE for external objects, since I
+	 * was assuming this would be handled through explicit syncs being imported
+	 * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers
+	 * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason.
+	 */
+	panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished,
+				DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE);
+}
+
+void panthor_sched_unplug(struct panthor_device *ptdev)
+{
+	struct panthor_scheduler *sched = ptdev->scheduler;
+
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	mutex_lock(&sched->lock);
+	if (sched->pm.has_ref) {
+		pm_runtime_put(ptdev->base.dev);
+		sched->pm.has_ref = false;
+	}
+	mutex_unlock(&sched->lock);
+}
+
+static void panthor_sched_fini(struct drm_device *ddev, void *res)
+{
+	struct panthor_scheduler *sched = res;
+	int prio;
+
+	if (!sched || !sched->csg_slot_count)
+		return;
+
+	cancel_delayed_work_sync(&sched->tick_work);
+
+	if (sched->wq)
+		destroy_workqueue(sched->wq);
+
+	if (sched->heap_alloc_wq)
+		destroy_workqueue(sched->heap_alloc_wq);
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
+		drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
+	}
+
+	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
+}
+
+int panthor_sched_init(struct panthor_device *ptdev)
+{
+	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
+	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0);
+	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0);
+	struct panthor_scheduler *sched;
+	u32 gpu_as_count, num_groups;
+	int prio, ret;
+
+	sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL);
+	if (!sched)
+		return -ENOMEM;
+
+	/* The highest bit in JOB_INT_* is reserved for globabl IRQs. That
+	 * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
+	 */
+	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
+
+	/* The FW-side scheduler might deadlock if two groups with the same
+	 * priority try to access a set of resources that overlaps, with part
+	 * of the resources being allocated to one group and the other part to
+	 * the other group, both groups waiting for the remaining resources to
+	 * be allocated. To avoid that, it is recommended to assign each CSG a
+	 * different priority. In theory we could allow several groups to have
+	 * the same CSG priority if they don't request the same resources, but
+	 * that makes the scheduling logic more complicated, so let's clamp
+	 * the number of CSG slots to MAX_CSG_PRIO + 1 for now.
+	 */
+	num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups);
+
+	/* We need at least one AS for the MCU and one for the GPU contexts. */
+	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1));
+	if (!gpu_as_count) {
+		drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
+			gpu_as_count + 1);
+		return -EINVAL;
+	}
+
+	sched->ptdev = ptdev;
+	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
+	sched->csg_slot_count = num_groups;
+	sched->cs_slot_count = csg_iface->control->stream_num;
+	sched->as_slot_count = gpu_as_count;
+	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
+	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
+	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
+
+	sched->last_tick = 0;
+	sched->resched_target = U64_MAX;
+	sched->tick_period = msecs_to_jiffies(10);
+	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
+	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
+	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
+
+	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
+	if (ret)
+		return ret;
+
+	for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) {
+		INIT_LIST_HEAD(&sched->groups.runnable[prio]);
+		INIT_LIST_HEAD(&sched->groups.idle[prio]);
+	}
+	INIT_LIST_HEAD(&sched->groups.waiting);
+
+	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&sched->reset.stopped_groups);
+
+	/* sched->heap_alloc_wq will be used for heap chunk allocation on
+	 * tiler OOM events, which means we can't use the same workqueue for
+	 * the scheduler because works queued by the scheduler are in
+	 * the dma-signalling path. Allocate a dedicated heap_alloc_wq to
+	 * work around this limitation.
+	 *
+	 * FIXME: Ultimately, what we need is a failable/non-blocking GEM
+	 * allocation path that we can call when a heap OOM is reported. The
+	 * FW is smart enough to fall back on other methods if the kernel can't
+	 * allocate memory, and fail the tiling job if none of these
+	 * countermeasures worked.
+	 *
+	 * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
+	 * system is running out of memory.
+	 */
+	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0);
+	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
+	if (!sched->wq || !sched->heap_alloc_wq) {
+		panthor_sched_fini(&ptdev->base, sched);
+		drm_err(&ptdev->base, "Failed to allocate the workqueues");
+		return -ENOMEM;
+	}
+
+	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
+	if (ret)
+		return ret;
+
+	ptdev->scheduler = sched;
+	return 0;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h
new file mode 100644
index 000000000000..66438b1f331f
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_sched.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2023 Collabora ltd. */
+
+#ifndef __PANTHOR_SCHED_H__
+#define __PANTHOR_SCHED_H__
+
+struct drm_exec;
+struct dma_fence;
+struct drm_file;
+struct drm_gem_object;
+struct drm_sched_job;
+struct drm_panthor_group_create;
+struct drm_panthor_queue_create;
+struct drm_panthor_group_get_state;
+struct drm_panthor_queue_submit;
+struct panthor_device;
+struct panthor_file;
+struct panthor_group_pool;
+struct panthor_job;
+
+int panthor_group_create(struct panthor_file *pfile,
+			 const struct drm_panthor_group_create *group_args,
+			 const struct drm_panthor_queue_create *queue_args);
+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle);
+int panthor_group_get_state(struct panthor_file *pfile,
+			    struct drm_panthor_group_get_state *get_state);
+
+struct drm_sched_job *
+panthor_job_create(struct panthor_file *pfile,
+		   u16 group_handle,
+		   const struct drm_panthor_queue_submit *qsubmit);
+struct drm_sched_job *panthor_job_get(struct drm_sched_job *job);
+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job);
+void panthor_job_put(struct drm_sched_job *job);
+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job);
+
+int panthor_group_pool_create(struct panthor_file *pfile);
+void panthor_group_pool_destroy(struct panthor_file *pfile);
+
+int panthor_sched_init(struct panthor_device *ptdev);
+void panthor_sched_unplug(struct panthor_device *ptdev);
+void panthor_sched_pre_reset(struct panthor_device *ptdev);
+void panthor_sched_post_reset(struct panthor_device *ptdev);
+void panthor_sched_suspend(struct panthor_device *ptdev);
+void panthor_sched_resume(struct panthor_device *ptdev);
+
+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev);
+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events);
+
+#endif
--
2.42.0


From 4aec4e3e7f3a3e78a4947d8492169f64b44f0b47 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:25 +0100
Subject: [PATCH 12/69] [MERGED] drm/panthor: Add the driver frontend block

This is the last piece missing to expose the driver to the outside
world.

This is basically a wrapper between the ioctls and the other logical
blocks.

v6:
- Add Maxime's and Heiko's acks
- Return a page-aligned BO size to userspace
- Keep header inclusion alphabetically ordered

v5:
- Account for the drm_exec_init() prototype change
- Include platform_device.h

v4:
- Add an ioctl to let the UMD query the VM state
- Fix kernel doc
- Let panthor_device_init() call panthor_device_init()
- Fix cleanup ordering in the panthor_init() error path
- Add Steve's and Liviu's R-b

v3:
- Add acks for the MIT/GPL2 relicensing
- Fix 32-bit support
- Account for panthor_vm and panthor_sched changes
- Simplify the resv preparation/update logic
- Use a linked list rather than xarray for list of signals.
- Simplify panthor_get_uobj_array by returning the newly allocated
  array.
- Drop the "DOC" for job submission helpers and move the relevant
  comments to panthor_ioctl_group_submit().
- Add helpers sync_op_is_signal()/sync_op_is_wait().
- Simplify return type of panthor_submit_ctx_add_sync_signal() and
  panthor_submit_ctx_get_sync_signal().
- Drop WARN_ON from panthor_submit_ctx_add_job().
- Fix typos in comments.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Liviu Dudau <liviu.dudau@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-12-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/panthor/panthor_drv.c | 1473 +++++++++++++++++++++++++
 1 file changed, 1473 insertions(+)

diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c
new file mode 100644
index 000000000000..ff484506229f
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -0,0 +1,1473 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2018 Marty E. Plummer <hanetzer@startmail.com> */
+/* Copyright 2019 Linaro, Ltd., Rob Herring <robh@kernel.org> */
+/* Copyright 2019 Collabora ltd. */
+
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_exec.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_syncobj.h>
+#include <drm/drm_utils.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_gem.h"
+#include "panthor_gpu.h"
+#include "panthor_heap.h"
+#include "panthor_mmu.h"
+#include "panthor_regs.h"
+#include "panthor_sched.h"
+
+/**
+ * DOC: user <-> kernel object copy helpers.
+ */
+
+/**
+ * panthor_set_uobj() - Copy kernel object to user object.
+ * @usr_ptr: Users pointer.
+ * @usr_size: Size of the user object.
+ * @min_size: Minimum size for this object.
+ * @kern_size: Size of the kernel object.
+ * @in: Address of the kernel object to copy.
+ *
+ * Helper automating kernel -> user object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in)
+{
+	/* User size shouldn't be smaller than the minimal object size. */
+	if (usr_size < min_size)
+		return -EINVAL;
+
+	if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size)))
+		return -EFAULT;
+
+	/* When the kernel object is smaller than the user object, we fill the gap with
+	 * zeros.
+	 */
+	if (usr_size > kern_size &&
+	    clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array.
+ * @in: The object array to copy.
+ * @min_stride: Minimum array stride.
+ * @obj_size: Kernel object size.
+ *
+ * Helper automating user -> kernel object copies.
+ *
+ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead.
+ *
+ * Return: newly allocated object array or an ERR_PTR on error.
+ */
+static void *
+panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride,
+		       u32 obj_size)
+{
+	int ret = 0;
+	void *out_alloc;
+
+	/* User stride must be at least the minimum object size, otherwise it might
+	 * lack useful information.
+	 */
+	if (in->stride < min_stride)
+		return ERR_PTR(-EINVAL);
+
+	if (!in->count)
+		return NULL;
+
+	out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL);
+	if (!out_alloc)
+		return ERR_PTR(-ENOMEM);
+
+	if (obj_size == in->stride) {
+		/* Fast path when user/kernel have the same uAPI header version. */
+		if (copy_from_user(out_alloc, u64_to_user_ptr(in->array),
+				   (unsigned long)obj_size * in->count))
+			ret = -EFAULT;
+	} else {
+		void __user *in_ptr = u64_to_user_ptr(in->array);
+		void *out_ptr = out_alloc;
+
+		/* If the sizes differ, we need to copy elements one by one. */
+		for (u32 i = 0; i < in->count; i++) {
+			ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride);
+			if (ret)
+				break;
+
+			out_ptr += obj_size;
+			in_ptr += in->stride;
+		}
+	}
+
+	if (ret) {
+		kvfree(out_alloc);
+		return ERR_PTR(ret);
+	}
+
+	return out_alloc;
+}
+
+/**
+ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size
+ * @_typename: Object type.
+ * @_last_mandatory_field: Last mandatory field.
+ *
+ * Get the minimum user object size based on the last mandatory field name,
+ * A.K.A, the name of the last field of the structure at the time this
+ * structure was added to the uAPI.
+ *
+ * Don't use directly, use PANTHOR_UOBJ_DECL() instead.
+ */
+#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \
+	(offsetof(_typename, _last_mandatory_field) + \
+	 sizeof(((_typename *)NULL)->_last_mandatory_field))
+
+/**
+ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to
+ * evolutions.
+ * @_typename: Object type.
+ * @_last_mandatory_field: Last mandatory field.
+ *
+ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list.
+ */
+#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \
+	_typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field)
+
+/**
+ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object
+ * @_obj_name: Object to get the minimum size of.
+ *
+ * Don't use this macro directly, it's automatically called by
+ * PANTHOR_UOBJ_{SET,GET_ARRAY}().
+ */
+#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \
+	_Generic(_obj_name, \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \
+		 PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs))
+
+/**
+ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
+ * @_dest_usr_ptr: User pointer to copy to.
+ * @_usr_size: Size of the user object.
+ * @_src_obj: Kernel object to copy (not a pointer).
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \
+	panthor_set_uobj(_dest_usr_ptr, _usr_size, \
+			 PANTHOR_UOBJ_MIN_SIZE(_src_obj), \
+			 sizeof(_src_obj), &(_src_obj))
+
+/**
+ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible
+ * object array.
+ * @_dest_array: Local variable that will hold the newly allocated kernel
+ * object array.
+ * @_uobj_array: The drm_panthor_obj_array object describing the user object
+ * array.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \
+	({ \
+		typeof(_dest_array) _tmp; \
+		_tmp = panthor_get_uobj_array(_uobj_array, \
+					      PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \
+					      sizeof((_dest_array)[0])); \
+		if (!IS_ERR(_tmp)) \
+			_dest_array = _tmp; \
+		PTR_ERR_OR_ZERO(_tmp); \
+	})
+
+/**
+ * struct panthor_sync_signal - Represent a synchronization object point to attach
+ * our job fence to.
+ *
+ * This structure is here to keep track of fences that are currently bound to
+ * a specific syncobj point.
+ *
+ * At the beginning of a job submission, the fence
+ * is retrieved from the syncobj itself, and can be NULL if no fence was attached
+ * to this point.
+ *
+ * At the end, it points to the fence of the last job that had a
+ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj.
+ *
+ * With jobs being submitted in batches, the fence might change several times during
+ * the process, allowing one job to wait on a job that's part of the same submission
+ * but appears earlier in the drm_panthor_group_submit::queue_submits array.
+ */
+struct panthor_sync_signal {
+	/** @node: list_head to track signal ops within a submit operation */
+	struct list_head node;
+
+	/** @handle: The syncobj handle. */
+	u32 handle;
+
+	/**
+	 * @point: The syncobj point.
+	 *
+	 * Zero for regular syncobjs, and non-zero for timeline syncobjs.
+	 */
+	u64 point;
+
+	/**
+	 * @syncobj: The sync object pointed by @handle.
+	 */
+	struct drm_syncobj *syncobj;
+
+	/**
+	 * @chain: Chain object used to link the new fence to an existing
+	 * timeline syncobj.
+	 *
+	 * NULL for regular syncobj, non-NULL for timeline syncobjs.
+	 */
+	struct dma_fence_chain *chain;
+
+	/**
+	 * @fence: The fence to assign to the syncobj or syncobj-point.
+	 */
+	struct dma_fence *fence;
+};
+
+/**
+ * struct panthor_job_ctx - Job context
+ */
+struct panthor_job_ctx {
+	/** @job: The job that is about to be submitted to drm_sched. */
+	struct drm_sched_job *job;
+
+	/** @syncops: Array of sync operations. */
+	struct drm_panthor_sync_op *syncops;
+
+	/** @syncop_count: Number of sync operations. */
+	u32 syncop_count;
+};
+
+/**
+ * struct panthor_submit_ctx - Submission context
+ *
+ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or
+ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the
+ * initialization and cleanup steps.
+ */
+struct panthor_submit_ctx {
+	/** @file: DRM file this submission happens on. */
+	struct drm_file *file;
+
+	/**
+	 * @signals: List of struct panthor_sync_signal.
+	 *
+	 * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here,
+	 * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry
+	 * matching the syncobj+point exists before calling
+	 * drm_syncobj_find_fence(). This allows us to describe dependencies
+	 * existing between jobs that are part of the same batch.
+	 */
+	struct list_head signals;
+
+	/** @jobs: Array of jobs. */
+	struct panthor_job_ctx *jobs;
+
+	/** @job_count: Number of entries in the @jobs array. */
+	u32 job_count;
+
+	/** @exec: drm_exec context used to acquire and prepare resv objects. */
+	struct drm_exec exec;
+};
+
+#define PANTHOR_SYNC_OP_FLAGS_MASK \
+	(DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL)
+
+static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op)
+{
+	return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
+}
+
+static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op)
+{
+	/* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */
+	return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL);
+}
+
+/**
+ * panthor_check_sync_op() - Check drm_panthor_sync_op fields
+ * @sync_op: The sync operation to check.
+ *
+ * Return: 0 on success, -EINVAL otherwise.
+ */
+static int
+panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op)
+{
+	u8 handle_type;
+
+	if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK)
+		return -EINVAL;
+
+	handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK;
+	if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
+	    handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ)
+		return -EINVAL;
+
+	if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ &&
+	    sync_op->timeline_value != 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object
+ * @sig_sync: Signal object to free.
+ */
+static void
+panthor_sync_signal_free(struct panthor_sync_signal *sig_sync)
+{
+	if (!sig_sync)
+		return;
+
+	drm_syncobj_put(sig_sync->syncobj);
+	dma_fence_chain_free(sig_sync->chain);
+	dma_fence_put(sig_sync->fence);
+	kfree(sig_sync);
+}
+
+/**
+ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context
+ * @ctx: Context to add the signal operation to.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: 0 on success, otherwise negative error value.
+ */
+static int
+panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+	struct dma_fence *cur_fence;
+	int ret;
+
+	sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL);
+	if (!sig_sync)
+		return -ENOMEM;
+
+	sig_sync->handle = handle;
+	sig_sync->point = point;
+
+	if (point > 0) {
+		sig_sync->chain = dma_fence_chain_alloc();
+		if (!sig_sync->chain) {
+			ret = -ENOMEM;
+			goto err_free_sig_sync;
+		}
+	}
+
+	sig_sync->syncobj = drm_syncobj_find(ctx->file, handle);
+	if (!sig_sync->syncobj) {
+		ret = -EINVAL;
+		goto err_free_sig_sync;
+	}
+
+	/* Retrieve the current fence attached to that point. It's
+	 * perfectly fine to get a NULL fence here, it just means there's
+	 * no fence attached to that point yet.
+	 */
+	if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence))
+		sig_sync->fence = cur_fence;
+
+	list_add_tail(&sig_sync->node, &ctx->signals);
+
+	return 0;
+
+err_free_sig_sync:
+	panthor_sync_signal_free(sig_sync);
+	return ret;
+}
+
+/**
+ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a
+ * submit context.
+ * @ctx: Context to search the signal operation in.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: A valid panthor_sync_signal object if found, NULL otherwise.
+ */
+static struct panthor_sync_signal *
+panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	list_for_each_entry(sig_sync, &ctx->signals, node) {
+		if (handle == sig_sync->handle && point == sig_sync->point)
+			return sig_sync;
+	}
+
+	return NULL;
+}
+
+/**
+ * panthor_submit_ctx_add_job() - Add a job to a submit context
+ * @ctx: Context to search the signal operation in.
+ * @idx: Index of the job in the context.
+ * @job: Job to add.
+ * @syncs: Sync operations provided by userspace.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx,
+			   struct drm_sched_job *job,
+			   const struct drm_panthor_obj_array *syncs)
+{
+	int ret;
+
+	ctx->jobs[idx].job = job;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs);
+	if (ret)
+		return ret;
+
+	ctx->jobs[idx].syncop_count = syncs->count;
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found.
+ * @ctx: Context to search the signal operation in.
+ * @handle: Syncobj handle.
+ * @point: Syncobj point.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point);
+	if (sig_sync)
+		return 0;
+
+	return panthor_submit_ctx_add_sync_signal(ctx, handle, point);
+}
+
+/**
+ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences
+ * on the signal operations specified by a job.
+ * @ctx: Context to search the signal operation in.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx,
+						 u32 job_idx)
+{
+	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
+						    struct panthor_device,
+						    base);
+	struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished;
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct dma_fence *old_fence;
+		struct panthor_sync_signal *sig_sync;
+
+		if (!sync_op_is_signal(&sync_ops[i]))
+			continue;
+
+		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
+								 sync_ops[i].timeline_value);
+		if (drm_WARN_ON(&ptdev->base, !sig_sync))
+			return -EINVAL;
+
+		old_fence = sig_sync->fence;
+		sig_sync->fence = dma_fence_get(done_fence);
+		dma_fence_put(old_fence);
+
+		if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations
+ * and add them to the context.
+ * @ctx: Context to search the signal operation in.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx,
+					  u32 job_idx)
+{
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		int ret;
+
+		if (!sync_op_is_signal(&sync_ops[i]))
+			continue;
+
+		ret = panthor_check_sync_op(&sync_ops[i]);
+		if (ret)
+			return ret;
+
+		ret = panthor_submit_ctx_get_sync_signal(ctx,
+							 sync_ops[i].handle,
+							 sync_ops[i].timeline_value);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push
+ * the currently assigned fence to the associated syncobj.
+ * @ctx: Context to push fences on.
+ *
+ * This is the last step of a submission procedure, and is done once we know the submission
+ * is effective and job fences are guaranteed to be signaled in finite time.
+ */
+static void
+panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx)
+{
+	struct panthor_sync_signal *sig_sync;
+
+	list_for_each_entry(sig_sync, &ctx->signals, node) {
+		if (sig_sync->chain) {
+			drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain,
+					      sig_sync->fence, sig_sync->point);
+			sig_sync->chain = NULL;
+		} else {
+			drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence);
+		}
+	}
+}
+
+/**
+ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as
+ * job dependencies.
+ * @ctx: Submit context.
+ * @job_idx: Index of the job to operate on.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx,
+					u32 job_idx)
+{
+	struct panthor_device *ptdev = container_of(ctx->file->minor->dev,
+						    struct panthor_device,
+						    base);
+	const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops;
+	struct drm_sched_job *job = ctx->jobs[job_idx].job;
+	u32 sync_op_count = ctx->jobs[job_idx].syncop_count;
+	int ret = 0;
+
+	for (u32 i = 0; i < sync_op_count; i++) {
+		struct panthor_sync_signal *sig_sync;
+		struct dma_fence *fence;
+
+		if (!sync_op_is_wait(&sync_ops[i]))
+			continue;
+
+		ret = panthor_check_sync_op(&sync_ops[i]);
+		if (ret)
+			return ret;
+
+		sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle,
+								 sync_ops[i].timeline_value);
+		if (sig_sync) {
+			if (drm_WARN_ON(&ptdev->base, !sig_sync->fence))
+				return -EINVAL;
+
+			fence = dma_fence_get(sig_sync->fence);
+		} else {
+			ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle,
+						     sync_ops[i].timeline_value,
+						     0, &fence);
+			if (ret)
+				return ret;
+		}
+
+		ret = drm_sched_job_add_dependency(job, fence);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations
+ * and add them to the submit context.
+ * @ctx: Submit context.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx)
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		int ret;
+
+		ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs
+ * @ctx: Submit context.
+ *
+ * Must be called after the resv preparation has been taken care of.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int
+panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx)
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		int ret;
+
+		ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i);
+		if (ret)
+			return ret;
+
+		drm_sched_job_arm(ctx->jobs[i].job);
+
+		ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities.
+ * @ctx: Submit context.
+ * @upd_resvs: Callback used to update reservation objects that were previously
+ * preapred.
+ */
+static void
+panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx,
+			     void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *))
+{
+	for (u32 i = 0; i < ctx->job_count; i++) {
+		upd_resvs(&ctx->exec, ctx->jobs[i].job);
+		drm_sched_entity_push_job(ctx->jobs[i].job);
+
+		/* Job is owned by the scheduler now. */
+		ctx->jobs[i].job = NULL;
+	}
+
+	panthor_submit_ctx_push_fences(ctx);
+}
+
+/**
+ * panthor_submit_ctx_init() - Initializes a submission context
+ * @ctx: Submit context to initialize.
+ * @file: drm_file this submission happens on.
+ * @job_count: Number of jobs that will be submitted.
+ *
+ * Return: 0 on success, a negative error code otherwise.
+ */
+static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx,
+				   struct drm_file *file, u32 job_count)
+{
+	ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs),
+				   GFP_KERNEL | __GFP_ZERO);
+	if (!ctx->jobs)
+		return -ENOMEM;
+
+	ctx->file = file;
+	ctx->job_count = job_count;
+	INIT_LIST_HEAD(&ctx->signals);
+	drm_exec_init(&ctx->exec,
+		      DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES,
+		      0);
+	return 0;
+}
+
+/**
+ * panthor_submit_ctx_cleanup() - Cleanup a submission context
+ * @ctx: Submit context to cleanup.
+ * @job_put: Job put callback.
+ */
+static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx,
+				       void (*job_put)(struct drm_sched_job *))
+{
+	struct panthor_sync_signal *sig_sync, *tmp;
+	unsigned long i;
+
+	drm_exec_fini(&ctx->exec);
+
+	list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node)
+		panthor_sync_signal_free(sig_sync);
+
+	for (i = 0; i < ctx->job_count; i++) {
+		job_put(ctx->jobs[i].job);
+		kvfree(ctx->jobs[i].syncops);
+	}
+
+	kvfree(ctx->jobs);
+}
+
+static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct drm_panthor_dev_query *args = data;
+
+	if (!args->pointer) {
+		switch (args->type) {
+		case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
+			args->size = sizeof(ptdev->gpu_info);
+			return 0;
+
+		case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
+			args->size = sizeof(ptdev->csif_info);
+			return 0;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	switch (args->type) {
+	case DRM_PANTHOR_DEV_QUERY_GPU_INFO:
+		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info);
+
+	case DRM_PANTHOR_DEV_QUERY_CSIF_INFO:
+		return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+#define PANTHOR_VM_CREATE_FLAGS			0
+
+static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data,
+				   struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_create *args = data;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	ret = panthor_vm_pool_create_vm(ptdev, pfile->vms,  args);
+	if (ret >= 0) {
+		args->id = ret;
+		ret = 0;
+	}
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
+				    struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_destroy *args = data;
+
+	if (args->pad)
+		return -EINVAL;
+
+	return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
+}
+
+#define PANTHOR_BO_FLAGS		DRM_PANTHOR_BO_NO_MMAP
+
+static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
+				   struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_bo_create *args = data;
+	struct panthor_vm *vm = NULL;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	if (!args->size || args->pad ||
+	    (args->flags & ~PANTHOR_BO_FLAGS)) {
+		ret = -EINVAL;
+		goto out_dev_exit;
+	}
+
+	if (args->exclusive_vm_id) {
+		vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
+		if (!vm) {
+			ret = -EINVAL;
+			goto out_dev_exit;
+		}
+	}
+
+	ret = panthor_gem_create_with_handle(file, ddev, vm, &args->size,
+					     args->flags, &args->handle);
+
+	panthor_vm_put(vm);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data,
+					struct drm_file *file)
+{
+	struct drm_panthor_bo_mmap_offset *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		goto out;
+
+	args->offset = drm_vma_node_offset_addr(&obj->vma_node);
+
+out:
+	drm_gem_object_put(obj);
+	return ret;
+}
+
+static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_submit *args = data;
+	struct drm_panthor_queue_submit *jobs_args;
+	struct panthor_submit_ctx ctx;
+	int ret = 0, cookie;
+
+	if (args->pad)
+		return -EINVAL;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits);
+	if (ret)
+		goto out_dev_exit;
+
+	ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count);
+	if (ret)
+		goto out_free_jobs_args;
+
+	/* Create jobs and attach sync operations */
+	for (u32 i = 0; i < args->queue_submits.count; i++) {
+		const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i];
+		struct drm_sched_job *job;
+
+		job = panthor_job_create(pfile, args->group_handle, qsubmit);
+		if (IS_ERR(job)) {
+			ret = PTR_ERR(job);
+			goto out_cleanup_submit_ctx;
+		}
+
+		ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs);
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	/*
+	 * Collect signal operations on all jobs, such that each job can pick
+	 * from it for its dependencies and update the fence to signal when the
+	 * job is submitted.
+	 */
+	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/*
+	 * We acquire/prepare revs on all jobs before proceeding with the
+	 * dependency registration.
+	 *
+	 * This is solving two problems:
+	 * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be
+	 *    protected by a lock to make sure no concurrent access to the same
+	 *    entity get interleaved, which would mess up with the fence seqno
+	 *    ordering. Luckily, one of the resv being acquired is the VM resv,
+	 *    and a scheduling entity is only bound to a single VM. As soon as
+	 *    we acquire the VM resv, we should be safe.
+	 * 2. Jobs might depend on fences that were issued by previous jobs in
+	 *    the same batch, so we can't add dependencies on all jobs before
+	 *    arming previous jobs and registering the fence to the signal
+	 *    array, otherwise we might miss dependencies, or point to an
+	 *    outdated fence.
+	 */
+	if (args->queue_submits.count > 0) {
+		/* All jobs target the same group, so they also point to the same VM. */
+		struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job);
+
+		drm_exec_until_all_locked(&ctx.exec) {
+			ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm,
+								  args->queue_submits.count);
+		}
+
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	/*
+	 * Now that resvs are locked/prepared, we can iterate over each job to
+	 * add the dependencies, arm the job fence, register the job fence to
+	 * the signal array.
+	 */
+	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Nothing can fail after that point, so we can make our job fences
+	 * visible to the outside world. Push jobs and set the job fences to
+	 * the resv slots we reserved.  This also pushes the fences to the
+	 * syncobjs that are part of the signal array.
+	 */
+	panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs);
+
+out_cleanup_submit_ctx:
+	panthor_submit_ctx_cleanup(&ctx, panthor_job_put);
+
+out_free_jobs_args:
+	kvfree(jobs_args);
+
+out_dev_exit:
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data,
+				       struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_destroy *args = data;
+
+	if (args->pad)
+		return -EINVAL;
+
+	return panthor_group_destroy(pfile, args->group_handle);
+}
+
+static int panthor_ioctl_group_create(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_create *args = data;
+	struct drm_panthor_queue_create *queue_args;
+	int ret;
+
+	if (!args->queues.count)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues);
+	if (ret)
+		return ret;
+
+	ret = panthor_group_create(pfile, args, queue_args);
+	if (ret >= 0) {
+		args->group_handle = ret;
+		ret = 0;
+	}
+
+	kvfree(queue_args);
+	return ret;
+}
+
+static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data,
+					 struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_group_get_state *args = data;
+
+	return panthor_group_get_state(pfile, args);
+}
+
+static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data,
+					   struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_tiler_heap_create *args = data;
+	struct panthor_heap_pool *pool;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	pool = panthor_vm_get_heap_pool(vm, true);
+	if (IS_ERR(pool)) {
+		ret = PTR_ERR(pool);
+		goto out_put_vm;
+	}
+
+	ret = panthor_heap_create(pool,
+				  args->initial_chunk_count,
+				  args->chunk_size,
+				  args->max_chunks,
+				  args->target_in_flight,
+				  &args->tiler_heap_ctx_gpu_va,
+				  &args->first_heap_chunk_gpu_va);
+	if (ret < 0)
+		goto out_put_heap_pool;
+
+	/* Heap pools are per-VM. We combine the VM and HEAP id to make
+	 * a unique heap handle.
+	 */
+	args->handle = (args->vm_id << 16) | ret;
+	ret = 0;
+
+out_put_heap_pool:
+	panthor_heap_pool_put(pool);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data,
+					    struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_tiler_heap_destroy *args = data;
+	struct panthor_heap_pool *pool;
+	struct panthor_vm *vm;
+	int ret;
+
+	if (args->pad)
+		return -EINVAL;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16);
+	if (!vm)
+		return -EINVAL;
+
+	pool = panthor_vm_get_heap_pool(vm, false);
+	if (!pool) {
+		ret = -EINVAL;
+		goto out_put_vm;
+	}
+
+	ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0));
+	panthor_heap_pool_put(pool);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_vm_bind_async(struct drm_device *ddev,
+				       struct drm_panthor_vm_bind *args,
+				       struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_bind_op *jobs_args;
+	struct panthor_submit_ctx ctx;
+	struct panthor_vm *vm;
+	int ret = 0;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
+	if (ret)
+		goto out_put_vm;
+
+	ret = panthor_submit_ctx_init(&ctx, file, args->ops.count);
+	if (ret)
+		goto out_free_jobs_args;
+
+	for (u32 i = 0; i < args->ops.count; i++) {
+		struct drm_panthor_vm_bind_op *op = &jobs_args[i];
+		struct drm_sched_job *job;
+
+		job = panthor_vm_bind_job_create(file, vm, op);
+		if (IS_ERR(job)) {
+			ret = PTR_ERR(job);
+			goto out_cleanup_submit_ctx;
+		}
+
+		ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs);
+		if (ret)
+			goto out_cleanup_submit_ctx;
+	}
+
+	ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Prepare reservation objects for each VM_BIND job. */
+	drm_exec_until_all_locked(&ctx.exec) {
+		for (u32 i = 0; i < ctx.job_count; i++) {
+			ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job);
+			drm_exec_retry_on_contention(&ctx.exec);
+			if (ret)
+				goto out_cleanup_submit_ctx;
+		}
+	}
+
+	ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx);
+	if (ret)
+		goto out_cleanup_submit_ctx;
+
+	/* Nothing can fail after that point. */
+	panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs);
+
+out_cleanup_submit_ctx:
+	panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put);
+
+out_free_jobs_args:
+	kvfree(jobs_args);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev,
+				      struct drm_panthor_vm_bind *args,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_bind_op *jobs_args;
+	struct panthor_vm *vm;
+	int ret;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops);
+	if (ret)
+		goto out_put_vm;
+
+	for (u32 i = 0; i < args->ops.count; i++) {
+		ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]);
+		if (ret) {
+			/* Update ops.count so the user knows where things failed. */
+			args->ops.count = i;
+			break;
+		}
+	}
+
+	kvfree(jobs_args);
+
+out_put_vm:
+	panthor_vm_put(vm);
+	return ret;
+}
+
+#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC
+
+static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data,
+				 struct drm_file *file)
+{
+	struct drm_panthor_vm_bind *args = data;
+	int cookie, ret;
+
+	if (!drm_dev_enter(ddev, &cookie))
+		return -ENODEV;
+
+	if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC)
+		ret = panthor_ioctl_vm_bind_async(ddev, args, file);
+	else
+		ret = panthor_ioctl_vm_bind_sync(ddev, args, file);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data,
+				      struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+	struct drm_panthor_vm_get_state *args = data;
+	struct panthor_vm *vm;
+
+	vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id);
+	if (!vm)
+		return -EINVAL;
+
+	if (panthor_vm_is_unusable(vm))
+		args->state = DRM_PANTHOR_VM_STATE_UNUSABLE;
+	else
+		args->state = DRM_PANTHOR_VM_STATE_USABLE;
+
+	panthor_vm_put(vm);
+	return 0;
+}
+
+static int
+panthor_open(struct drm_device *ddev, struct drm_file *file)
+{
+	struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
+	struct panthor_file *pfile;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -EINVAL;
+
+	pfile = kzalloc(sizeof(*pfile), GFP_KERNEL);
+	if (!pfile) {
+		ret = -ENOMEM;
+		goto err_put_mod;
+	}
+
+	pfile->ptdev = ptdev;
+
+	ret = panthor_vm_pool_create(pfile);
+	if (ret)
+		goto err_free_file;
+
+	ret = panthor_group_pool_create(pfile);
+	if (ret)
+		goto err_destroy_vm_pool;
+
+	file->driver_priv = pfile;
+	return 0;
+
+err_destroy_vm_pool:
+	panthor_vm_pool_destroy(pfile);
+
+err_free_file:
+	kfree(pfile);
+
+err_put_mod:
+	module_put(THIS_MODULE);
+	return ret;
+}
+
+static void
+panthor_postclose(struct drm_device *ddev, struct drm_file *file)
+{
+	struct panthor_file *pfile = file->driver_priv;
+
+	panthor_group_pool_destroy(pfile);
+	panthor_vm_pool_destroy(pfile);
+
+	kfree(pfile);
+	module_put(THIS_MODULE);
+}
+
+static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = {
+#define PANTHOR_IOCTL(n, func, flags) \
+	DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags)
+
+	PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW),
+	PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW),
+};
+
+static int panthor_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file = filp->private_data;
+	struct panthor_file *pfile = file->driver_priv;
+	struct panthor_device *ptdev = pfile->ptdev;
+	u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	int ret, cookie;
+
+	if (!drm_dev_enter(file->minor->dev, &cookie))
+		return -ENODEV;
+
+	if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET)
+		ret = panthor_device_mmap_io(ptdev, vma);
+	else
+		ret = drm_gem_mmap(filp, vma);
+
+	drm_dev_exit(cookie);
+	return ret;
+}
+
+static const struct file_operations panthor_drm_driver_fops = {
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.compat_ioctl = drm_compat_ioctl,
+	.poll = drm_poll,
+	.read = drm_read,
+	.llseek = noop_llseek,
+	.mmap = panthor_mmap,
+};
+
+#ifdef CONFIG_DEBUG_FS
+static void panthor_debugfs_init(struct drm_minor *minor)
+{
+	panthor_mmu_debugfs_init(minor);
+}
+#endif
+
+/*
+ * PanCSF driver version:
+ * - 1.0 - initial interface
+ */
+static const struct drm_driver panthor_drm_driver = {
+	.driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ |
+			   DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA,
+	.open = panthor_open,
+	.postclose = panthor_postclose,
+	.ioctls = panthor_drm_driver_ioctls,
+	.num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls),
+	.fops = &panthor_drm_driver_fops,
+	.name = "panthor",
+	.desc = "Panthor DRM driver",
+	.date = "20230801",
+	.major = 1,
+	.minor = 0,
+
+	.gem_create_object = panthor_gem_create_object,
+	.gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init = panthor_debugfs_init,
+#endif
+};
+
+static int panthor_probe(struct platform_device *pdev)
+{
+	struct panthor_device *ptdev;
+
+	ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver,
+				   struct panthor_device, base);
+	if (!ptdev)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, ptdev);
+
+	return panthor_device_init(ptdev);
+}
+
+static void panthor_remove(struct platform_device *pdev)
+{
+	struct panthor_device *ptdev = platform_get_drvdata(pdev);
+
+	panthor_device_unplug(ptdev);
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "rockchip,rk3588-mali" },
+	{ .compatible = "arm,mali-valhall-csf" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, dt_match);
+
+static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops,
+				 panthor_device_suspend,
+				 panthor_device_resume,
+				 NULL);
+
+static struct platform_driver panthor_driver = {
+	.probe = panthor_probe,
+	.remove_new = panthor_remove,
+	.driver = {
+		.name = "panthor",
+		.pm = &panthor_pm_ops,
+		.of_match_table = dt_match,
+	},
+};
+
+/*
+ * Workqueue used to cleanup stuff.
+ *
+ * We create a dedicated workqueue so we can drain on unplug and
+ * make sure all resources are freed before the module is unloaded.
+ */
+struct workqueue_struct *panthor_cleanup_wq;
+
+static int __init panthor_init(void)
+{
+	int ret;
+
+	ret = panthor_mmu_pt_cache_init();
+	if (ret)
+		return ret;
+
+	panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0);
+	if (!panthor_cleanup_wq) {
+		pr_err("panthor: Failed to allocate the workqueues");
+		ret = -ENOMEM;
+		goto err_mmu_pt_cache_fini;
+	}
+
+	ret = platform_driver_register(&panthor_driver);
+	if (ret)
+		goto err_destroy_cleanup_wq;
+
+	return 0;
+
+err_destroy_cleanup_wq:
+	destroy_workqueue(panthor_cleanup_wq);
+
+err_mmu_pt_cache_fini:
+	panthor_mmu_pt_cache_fini();
+	return ret;
+}
+module_init(panthor_init);
+
+static void __exit panthor_exit(void)
+{
+	platform_driver_unregister(&panthor_driver);
+	destroy_workqueue(panthor_cleanup_wq);
+	panthor_mmu_pt_cache_fini();
+}
+module_exit(panthor_exit);
+
+MODULE_AUTHOR("Panthor Project Developers");
+MODULE_DESCRIPTION("Panthor DRM Driver");
+MODULE_LICENSE("Dual MIT/GPL");
--
2.42.0


From 440259f31cc0eafd030168002b8824d6226de14c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:26 +0100
Subject: [PATCH 13/69] [MERGED] drm/panthor: Allow driver compilation

Now that all blocks are available, we can add/update Kconfig/Makefile
files to allow compilation.

v6:
- Add Maxime's and Heiko's acks
- Keep source files alphabetically ordered in the Makefile

v4:
- Add Steve's R-b

v3:
- Add a dep on DRM_GPUVM
- Fix dependencies in Kconfig
- Expand help text to (hopefully) describe which GPUs are to be
  supported by this driver and which are for panfrost.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Acked-by: Steven Price <steven.price@arm.com> # MIT+GPL2 relicensing,Arm
Acked-by: Grant Likely <grant.likely@linaro.org> # MIT+GPL2 relicensing,Linaro
Acked-by: Boris Brezillon <boris.brezillon@collabora.com> # MIT+GPL2 relicensing,Collabora
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-13-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/gpu/drm/Kconfig          |  2 ++
 drivers/gpu/drm/Makefile         |  1 +
 drivers/gpu/drm/panthor/Kconfig  | 23 +++++++++++++++++++++++
 drivers/gpu/drm/panthor/Makefile | 14 ++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 5a0c476361c3..fb65d3ae1daf 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -371,6 +371,8 @@ source "drivers/gpu/drm/lima/Kconfig"

 source "drivers/gpu/drm/panfrost/Kconfig"

+source "drivers/gpu/drm/panthor/Kconfig"
+
 source "drivers/gpu/drm/aspeed/Kconfig"

 source "drivers/gpu/drm/mcde/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 104b42df2e95..6eb2b553a163 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -179,6 +179,7 @@ obj-$(CONFIG_DRM_XEN) += xen/
 obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
 obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
+obj-$(CONFIG_DRM_PANTHOR) += panthor/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
 obj-$(CONFIG_DRM_MCDE) += mcde/
 obj-$(CONFIG_DRM_TIDSS) += tidss/
diff --git a/drivers/gpu/drm/panthor/Kconfig b/drivers/gpu/drm/panthor/Kconfig
new file mode 100644
index 000000000000..55b40ad07f3b
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Kconfig
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+config DRM_PANTHOR
+	tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)"
+	depends on DRM
+	depends on ARM || ARM64 || COMPILE_TEST
+	depends on !GENERIC_ATOMIC64  # for IOMMU_IO_PGTABLE_LPAE
+	depends on MMU
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DRM_EXEC
+	select DRM_GEM_SHMEM_HELPER
+	select DRM_GPUVM
+	select DRM_SCHED
+	select IOMMU_IO_PGTABLE_LPAE
+	select IOMMU_SUPPORT
+	select PM_DEVFREQ
+	help
+	  DRM driver for ARM Mali CSF-based GPUs.
+
+	  This driver is for Mali (or Immortalis) Valhall Gxxx GPUs.
+
+	  Note that the Mali-G68 and Mali-G78, while Valhall architecture, will
+	  be supported with the panfrost driver as they are not CSF GPUs.
diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile
new file mode 100644
index 000000000000..15294719b09c
--- /dev/null
+++ b/drivers/gpu/drm/panthor/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0 or MIT
+
+panthor-y := \
+	panthor_devfreq.o \
+	panthor_device.o \
+	panthor_drv.o \
+	panthor_fw.o \
+	panthor_gem.o \
+	panthor_gpu.o \
+	panthor_heap.o \
+	panthor_mmu.o \
+	panthor_sched.o
+
+obj-$(CONFIG_DRM_PANTHOR) += panthor.o
--
2.42.0


From 4455124271e87185da9ef0ab19843e7e4a9230e7 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <liviu.dudau@arm.com>
Date: Thu, 29 Feb 2024 17:22:27 +0100
Subject: [PATCH 14/69] [MERGED] dt-bindings: gpu: mali-valhall-csf: Add
 support for Arm Mali CSF GPUs

Arm has introduced a new v10 GPU architecture that replaces the Job Manager
interface with a new Command Stream Frontend. It adds firmware driven
command stream queues that can be used by kernel and user space to submit
jobs to the GPU.

Add the initial schema for the device tree that is based on support for
RK3588 SoC. The minimum number of clocks is one for the IP, but on Rockchip
platforms they will tend to expose the semi-independent clocks for better
power management.

v6:
- Add Maxime's and Heiko's acks

v5:
- Move the opp-table node under the gpu node

v4:
- Fix formatting issue

v3:
- Cleanup commit message to remove redundant text
- Added opp-table property and re-ordered entries
- Clarified power-domains and power-domain-names requirements for RK3588.
- Cleaned up example

Note: power-domains and power-domain-names requirements for other platforms
are still work in progress, hence the bindings are left incomplete here.

v2:
- New commit

Signed-off-by: Liviu Dudau <liviu.dudau@arm.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Conor Dooley <conor+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-14-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 .../bindings/gpu/arm,mali-valhall-csf.yaml    | 147 ++++++++++++++++++
 1 file changed, 147 insertions(+)

diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml
new file mode 100644
index 000000000000..a5b4e0021758
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpu/arm,mali-valhall-csf.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ARM Mali Valhall GPU
+
+maintainers:
+  - Liviu Dudau <liviu.dudau@arm.com>
+  - Boris Brezillon <boris.brezillon@collabora.com>
+
+properties:
+  $nodename:
+    pattern: '^gpu@[a-f0-9]+$'
+
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - rockchip,rk3588-mali
+          - const: arm,mali-valhall-csf   # Mali Valhall GPU model/revision is fully discoverable
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: Job interrupt
+      - description: MMU interrupt
+      - description: GPU interrupt
+
+  interrupt-names:
+    items:
+      - const: job
+      - const: mmu
+      - const: gpu
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    minItems: 1
+    items:
+      - const: core
+      - const: coregroup
+      - const: stacks
+
+  mali-supply: true
+
+  operating-points-v2: true
+  opp-table:
+    type: object
+
+  power-domains:
+    minItems: 1
+    maxItems: 5
+
+  power-domain-names:
+    minItems: 1
+    maxItems: 5
+
+  sram-supply: true
+
+  "#cooling-cells":
+    const: 2
+
+  dynamic-power-coefficient:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      A u32 value that represents the running time dynamic
+      power coefficient in units of uW/MHz/V^2. The
+      coefficient can either be calculated from power
+      measurements or derived by analysis.
+
+      The dynamic power consumption of the GPU is
+      proportional to the square of the Voltage (V) and
+      the clock frequency (f). The coefficient is used to
+      calculate the dynamic power as below -
+
+      Pdyn = dynamic-power-coefficient * V^2 * f
+
+      where voltage is in V, frequency is in MHz.
+
+  dma-coherent: true
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - mali-supply
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: rockchip,rk3588-mali
+    then:
+      properties:
+        clocks:
+          minItems: 3
+        power-domains:
+          maxItems: 1
+        power-domain-names: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/power/rk3588-power.h>
+
+    gpu: gpu@fb000000 {
+        compatible = "rockchip,rk3588-mali", "arm,mali-valhall-csf";
+        reg = <0xfb000000 0x200000>;
+        interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH 0>,
+                     <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH 0>,
+                     <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH 0>;
+        interrupt-names = "job", "mmu", "gpu";
+        clock-names = "core", "coregroup", "stacks";
+        clocks = <&cru CLK_GPU>, <&cru CLK_GPU_COREGROUP>,
+                 <&cru CLK_GPU_STACKS>;
+        power-domains = <&power RK3588_PD_GPU>;
+        operating-points-v2 = <&gpu_opp_table>;
+        mali-supply = <&vdd_gpu_s0>;
+        sram-supply = <&vdd_gpu_mem_s0>;
+
+        gpu_opp_table: opp-table {
+            compatible = "operating-points-v2";
+            opp-300000000 {
+                opp-hz = /bits/ 64 <300000000>;
+                opp-microvolt = <675000 675000 850000>;
+            };
+            opp-400000000 {
+                opp-hz = /bits/ 64 <400000000>;
+                opp-microvolt = <675000 675000 850000>;
+            };
+        };
+    };
+
+...
--
2.42.0


From 27dfd585be675edc5b80449a16304ac3dfc2ea57 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Thu, 29 Feb 2024 17:22:28 +0100
Subject: [PATCH 15/69] [MERGED] drm/panthor: Add an entry to MAINTAINERS

Add an entry for the Panthor driver to the MAINTAINERS file.

v6:
- Add Maxime's and Heiko's acks

v4:
- Add Steve's R-b

v3:
- Add bindings document as an 'F:' line.
- Add Steven and Liviu as co-maintainers.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Maxime Ripard <mripard@kernel.org>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20240229162230.2634044-15-boris.brezillon@collabora.com
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 MAINTAINERS | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index aa3b947fb080..2b56b1bddd67 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1688,6 +1688,17 @@ F:	Documentation/gpu/panfrost.rst
 F:	drivers/gpu/drm/panfrost/
 F:	include/uapi/drm/panfrost_drm.h

+ARM MALI PANTHOR DRM DRIVER
+M:	Boris Brezillon <boris.brezillon@collabora.com>
+M:	Steven Price <steven.price@arm.com>
+M:	Liviu Dudau <liviu.dudau@arm.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Supported
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+F:	Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml
+F:	drivers/gpu/drm/panthor/
+F:	include/uapi/drm/panthor_drm.h
+
 ARM MALI-DP DRM DRIVER
 M:	Liviu Dudau <liviu.dudau@arm.com>
 S:	Supported
--
2.42.0


From 690ff4b82374e1e1e950775bc630360a0e905836 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 25 Jan 2024 14:46:53 +0100
Subject: [PATCH 16/69] arm64: defconfig: support Mali CSF-based GPUs

Enable support for Mali CSF-based GPUs, which is found on recent
ARM SoCs, such as Rockchip or Mediatek.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 2c30d617e180..65e33174f813 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -907,6 +907,7 @@ CONFIG_DRM_MESON=m
 CONFIG_DRM_PL111=m
 CONFIG_DRM_LIMA=m
 CONFIG_DRM_PANFROST=m
+CONFIG_DRM_PANTHOR=m
 CONFIG_DRM_TIDSS=m
 CONFIG_DRM_POWERVR=m
 CONFIG_FB=y
--
2.42.0


From dff9b9ff5e1cbdd0e7c97eb289a59460a1462637 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 7 Aug 2023 17:30:58 +0200
Subject: [PATCH 17/69] arm64: dts: rockchip: rk3588: Add GPU nodes

Add Mali GPU Node to the RK3588 SoC DT including GPU clock
operating points

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 56 +++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 87b83c87bd55..89d40cff635f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -501,6 +501,62 @@ usb_host2_xhci: usb@fcd00000 {
 		status = "disabled";
 	};

+	gpu: gpu@fb000000 {
+		compatible = "rockchip,rk3588-mali", "arm,mali-valhall-csf";
+		reg = <0x0 0xfb000000 0x0 0x200000>;
+		#cooling-cells = <2>;
+		assigned-clocks = <&scmi_clk SCMI_CLK_GPU>;
+		assigned-clock-rates = <200000000>;
+		clocks = <&cru CLK_GPU>, <&cru CLK_GPU_COREGROUP>,
+			 <&cru CLK_GPU_STACKS>;
+		clock-names = "core", "coregroup", "stacks";
+		dynamic-power-coefficient = <2982>;
+		interrupts = <GIC_SPI 92 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH 0>;
+		interrupt-names = "job", "mmu", "gpu";
+		operating-points-v2 = <&gpu_opp_table>;
+		power-domains = <&power RK3588_PD_GPU>;
+		status = "disabled";
+
+		gpu_opp_table: opp-table {
+			compatible = "operating-points-v2";
+
+			opp-300000000 {
+				opp-hz = /bits/ 64 <300000000>;
+				opp-microvolt = <675000 675000 850000>;
+			};
+			opp-400000000 {
+				opp-hz = /bits/ 64 <400000000>;
+				opp-microvolt = <675000 675000 850000>;
+			};
+			opp-500000000 {
+				opp-hz = /bits/ 64 <500000000>;
+				opp-microvolt = <675000 675000 850000>;
+			};
+			opp-600000000 {
+				opp-hz = /bits/ 64 <600000000>;
+				opp-microvolt = <675000 675000 850000>;
+			};
+			opp-700000000 {
+				opp-hz = /bits/ 64 <700000000>;
+				opp-microvolt = <700000 700000 850000>;
+			};
+			opp-800000000 {
+				opp-hz = /bits/ 64 <800000000>;
+				opp-microvolt = <750000 750000 850000>;
+			};
+			opp-900000000 {
+				opp-hz = /bits/ 64 <900000000>;
+				opp-microvolt = <800000 800000 850000>;
+			};
+			opp-1000000000 {
+				opp-hz = /bits/ 64 <1000000000>;
+				opp-microvolt = <850000 850000 850000>;
+			};
+		};
+	};
+
 	pmu1grf: syscon@fd58a000 {
 		compatible = "rockchip,rk3588-pmugrf", "syscon", "simple-mfd";
 		reg = <0x0 0xfd58a000 0x0 0x10000>;
--
2.42.0


From a34ac8a92db3912ee8eadd2fe64240369f2089eb Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Tue, 8 Aug 2023 12:05:22 +0200
Subject: [PATCH 18/69] arm64: dts: rockchip: rk3588-rock5b: Enable GPU

Enable the Mali GPU in the Rock 5B.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index 1fe8b2a0ed75..d6bf2ee07e87 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -180,6 +180,11 @@ &cpu_l3 {
 	cpu-supply = <&vdd_cpu_lit_s0>;
 };

+&gpu {
+	mali-supply = <&vdd_gpu_s0>;
+	status = "okay";
+};
+
 &i2c0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0m2_xfer>;
--
2.42.0


From 40e530bfccda818c2e1c8eca1f9dfe0f414b3ff6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@collabora.com>
Date: Mon, 7 Aug 2023 17:36:22 +0200
Subject: [PATCH 19/69] arm64: dts: rockchip: rk3588-evb1: Enable GPU

Enable the Mali GPU in the RK3588 EVB1.

This marks the GPU regulators as always-on, because the generic
coupler regulator logic from the kernel can only handle them
when they are marked as always-on. Technically it's okay to
disable the regulators, when the GPU is not used.

Considering the RK3588 EVB1 is not battery powered, the slightly
increased power consumption for keeping the regulator always
enabled is not a big deal. Thus it's better to enable GPU support
than wait for a better solution.

Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index de30c2632b8e..56c019b25fa8 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -281,6 +281,12 @@ &gmac0_rgmii_clk
 	status = "okay";
 };

+&gpu {
+	mali-supply = <&vdd_gpu_s0>;
+	sram-supply = <&vdd_gpu_mem_s0>;
+	status = "okay";
+};
+
 &i2c2 {
 	status = "okay";

@@ -484,12 +490,16 @@ rk806_dvs3_null: dvs3-null-pins {

 		regulators {
 			vdd_gpu_s0: dcdc-reg1 {
+				/* regulator coupling requires always-on */
+				regulator-always-on;
 				regulator-boot-on;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
 				regulator-name = "vdd_gpu_s0";
 				regulator-enable-ramp-delay = <400>;
+				regulator-coupled-with = <&vdd_gpu_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-state-mem {
 					regulator-off-in-suspend;
 				};
@@ -534,12 +544,16 @@ regulator-state-mem {
 			};

 			vdd_gpu_mem_s0: dcdc-reg5 {
+				/* regulator coupling requires always-on */
+				regulator-always-on;
 				regulator-boot-on;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
 				regulator-enable-ramp-delay = <400>;
 				regulator-name = "vdd_gpu_mem_s0";
+				regulator-coupled-with = <&vdd_gpu_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-state-mem {
 					regulator-off-in-suspend;
 				};
--
2.42.0


From 5c55891831f244cf2dde12055b1da8738094fef4 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 24 Oct 2023 16:09:35 +0200
Subject: [PATCH 20/69] math.h: add DIV_ROUND_UP_NO_OVERFLOW

Add a new DIV_ROUND_UP helper, which cannot overflow when
big numbers are being used.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/math.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/math.h b/include/linux/math.h
index dd4152711de7..f80bfb375ab9 100644
--- a/include/linux/math.h
+++ b/include/linux/math.h
@@ -36,6 +36,17 @@

 #define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP

+/**
+ * DIV_ROUND_UP_NO_OVERFLOW - divide two numbers and always round up
+ * @n: numerator / dividend
+ * @d: denominator / divisor
+ *
+ * This functions does the same as DIV_ROUND_UP, but internally uses a
+ * division and a modulo operation instead of math tricks. This way it
+ * avoids overflowing when handling big numbers.
+ */
+#define DIV_ROUND_UP_NO_OVERFLOW(n, d) (((n) / (d)) + !!((n) % (d)))
+
 #define DIV_ROUND_DOWN_ULL(ll, d) \
 	({ unsigned long long _tmp = (ll); do_div(_tmp, d); _tmp; })

--
2.42.0


From 81f3db1af2934b19df3dd6a7103db558ce6281da Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 24 Oct 2023 16:13:50 +0200
Subject: [PATCH 21/69] clk: divider: Fix divisor masking on 64 bit platforms

The clock framework handles clock rates as "unsigned long", so u32 on
32-bit architectures and u64 on 64-bit architectures.

The current code casts the dividend to u64 on 32-bit to avoid a
potential overflow. For example DIV_ROUND_UP(3000000000, 1500000000)
= (3.0G + 1.5G - 1) / 1.5G = = OVERFLOW / 1.5G, which has been
introduced in commit 9556f9dad8f5 ("clk: divider: handle integer overflow
when dividing large clock rates").

On 64 bit platforms this masks the divisor, so that only the lower
32 bit are used. Thus requesting a frequency >= 4.3GHz results
in incorrect values. For example requesting 4300000000 (4.3 GHz) will
effectively request ca. 5 MHz. Requesting clk_round_rate(clk, ULONG_MAX)
is a bit of a special case, since that still returns correct values as
long as the parent clock is below 8.5 GHz.

Fix this by switching to DIV_ROUND_UP_NO_OVERFLOW, which cannot
overflow. This avoids any requirements on the arguments (except
that divisor should not be 0 obviously).

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/clk-divider.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index a2c2b5203b0a..94b4fb66a60f 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -220,7 +220,7 @@ static int _div_round_up(const struct clk_div_table *table,
 			 unsigned long parent_rate, unsigned long rate,
 			 unsigned long flags)
 {
-	int div = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+	int div = DIV_ROUND_UP_NO_OVERFLOW(parent_rate, rate);

 	if (flags & CLK_DIVIDER_POWER_OF_TWO)
 		div = __roundup_pow_of_two(div);
@@ -237,7 +237,7 @@ static int _div_round_closest(const struct clk_div_table *table,
 	int up, down;
 	unsigned long up_rate, down_rate;

-	up = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+	up = DIV_ROUND_UP_NO_OVERFLOW(parent_rate, rate);
 	down = parent_rate / rate;

 	if (flags & CLK_DIVIDER_POWER_OF_TWO) {
@@ -473,7 +473,7 @@ int divider_get_val(unsigned long rate, unsigned long parent_rate,
 {
 	unsigned int div, value;

-	div = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+	div = DIV_ROUND_UP_NO_OVERFLOW(parent_rate, rate);

 	if (!_is_valid_div(table, div, flags))
 		return -EINVAL;
--
2.42.0


From 46254b2d8f2fb027df8597641878069ca11a7f11 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 24 Oct 2023 18:09:57 +0200
Subject: [PATCH 22/69] clk: composite: replace open-coded abs_diff()

Replace the open coded abs_diff() with the existing helper function.

Suggested-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/clk-composite.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index 66759fe28fad..478a4e594336 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -6,6 +6,7 @@
 #include <linux/clk-provider.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/math.h>
 #include <linux/slab.h>

 static u8 clk_composite_get_parent(struct clk_hw *hw)
@@ -119,10 +120,7 @@ static int clk_composite_determine_rate(struct clk_hw *hw,
 			if (ret)
 				continue;

-			if (req->rate >= tmp_req.rate)
-				rate_diff = req->rate - tmp_req.rate;
-			else
-				rate_diff = tmp_req.rate - req->rate;
+			rate_diff = abs_diff(req->rate, tmp_req.rate);

 			if (!rate_diff || !req->best_parent_hw
 				       || best_rate_diff > rate_diff) {
--
2.42.0


From 9e061b100ccc3e2000ca51c43e53837b18f7b66b Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Apr 2023 17:38:57 +0200
Subject: [PATCH 23/69] dt-bindings: phy: add rockchip usbdp combo phy document

Add device tree binding document for Rockchip USBDP Combo PHY
with Samsung IP block.

Co-developed-by: Frank Wang <frank.wang@rock-chips.com>
Signed-off-by: Frank Wang <frank.wang@rock-chips.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../bindings/phy/phy-rockchip-usbdp.yaml      | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml b/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml
new file mode 100644
index 000000000000..1f1f8863b80d
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-rockchip-usbdp.yaml
@@ -0,0 +1,148 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/phy-rockchip-usbdp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip USBDP Combo PHY with Samsung IP block
+
+maintainers:
+  - Frank Wang <frank.wang@rock-chips.com>
+  - Zhang Yubing <yubing.zhang@rock-chips.com>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3588-usbdp-phy
+
+  reg:
+    maxItems: 1
+
+  "#phy-cells":
+    description: |
+      Cell allows setting the type of the PHY. Possible values are:
+      - PHY_TYPE_USB3
+      - PHY_TYPE_DP
+    const: 1
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: refclk
+      - const: immortal
+      - const: pclk
+      - const: utmi
+
+  resets:
+    maxItems: 5
+
+  reset-names:
+    items:
+      - const: init
+      - const: cmn
+      - const: lane
+      - const: pcs_apb
+      - const: pma_apb
+
+  rockchip,dp-lane-mux:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 2
+    maxItems: 4
+    items:
+      maximum: 3
+    description:
+      An array of physical Type-C lanes indexes. Position of an entry
+      determines the DisplayPort (DP) lane index, while the value of an entry
+      indicates physical Type-C lane. The supported DP lanes number are 2 or 4.
+      e.g. for 2 lanes DP lanes map, we could have "rockchip,dp-lane-mux = <2,
+      3>;", assuming DP lane0 on Type-C phy lane2, DP lane1 on Type-C phy
+      lane3. For 4 lanes DP lanes map, we could have "rockchip,dp-lane-mux =
+      <0, 1, 2, 3>;", assuming DP lane0 on Type-C phy lane0, DP lane1 on Type-C
+      phy lane1, DP lane2 on Type-C phy lane2, DP lane3 on Type-C phy lane3. If
+      DP lanes are mapped by DisplayPort Alt mode, this property is not needed.
+
+  rockchip,u2phy-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the 'usb2 phy general register files'.
+
+  rockchip,usb-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the 'usb general register files'.
+
+  rockchip,usbdpphy-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the 'usbdp phy general register files'.
+
+  rockchip,vo-grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the syscon managing the 'video output general register files'.
+      When select the DP lane mapping will request its phandle.
+
+  sbu1-dc-gpios:
+    description:
+      GPIO connected to the SBU1 line of the USB-C connector via a big resistor
+      (~100K) to apply a DC offset for signalling the connector orientation.
+    maxItems: 1
+
+  sbu2-dc-gpios:
+    description:
+      GPIO connected to the SBU2 line of the USB-C connector via a big resistor
+      (~100K) to apply a DC offset for signalling the connector orientation.
+    maxItems: 1
+
+  orientation-switch:
+    description: Flag the port as possible handler of orientation switching
+    type: boolean
+
+  mode-switch:
+    description: Flag the port as possible handler of altmode switching
+    type: boolean
+
+  port:
+    $ref: /schemas/graph.yaml#/properties/port
+    description:
+      A port node to link the PHY to a TypeC controller for the purpose of
+      handling orientation switching.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
+
+    usbdp_phy0: phy@fed80000 {
+      compatible = "rockchip,rk3588-usbdp-phy";
+      reg = <0xfed80000 0x10000>;
+      #phy-cells = <1>;
+      clocks = <&cru CLK_USBDPPHY_MIPIDCPPHY_REF>,
+               <&cru CLK_USBDP_PHY0_IMMORTAL>,
+               <&cru PCLK_USBDPPHY0>,
+               <&u2phy0>;
+      clock-names = "refclk", "immortal", "pclk", "utmi";
+      resets = <&cru SRST_USBDP_COMBO_PHY0_INIT>,
+               <&cru SRST_USBDP_COMBO_PHY0_CMN>,
+               <&cru SRST_USBDP_COMBO_PHY0_LANE>,
+               <&cru SRST_USBDP_COMBO_PHY0_PCS>,
+               <&cru SRST_P_USBDPPHY0>;
+      reset-names = "init", "cmn", "lane", "pcs_apb", "pma_apb";
+      rockchip,u2phy-grf = <&usb2phy0_grf>;
+      rockchip,usb-grf = <&usb_grf>;
+      rockchip,usbdpphy-grf = <&usbdpphy0_grf>;
+      rockchip,vo-grf = <&vo0_grf>;
+    };
--
2.42.0


From fbe0fb8446cf7479f3c9b35552eef78f6a3d8b65 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Apr 2023 15:55:54 +0200
Subject: [PATCH 24/69] phy: rockchip: add usbdp combo phy driver

This adds a new USBDP combo PHY with Samsung IP block driver.

The driver get lane mux and mapping info in 2 ways, supporting
DisplayPort alternate mode or parsing from DT. When parsing from DT,
the property "rockchip,dp-lane-mux" provide the DP mux and mapping
info. This is needed when the PHY is not used with TypeC Alt-Mode.
For example if the USB3 interface of the PHY is connected to a USB
Type A connector and the DP interface is connected to a DisplayPort
connector.

When do DP link training, need to set lane number, link rate, swing,
and pre-emphasis via PHY configure interface.

Co-developed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Co-developed-by: Zhang Yubing <yubing.zhang@rock-chips.com>
Signed-off-by: Zhang Yubing <yubing.zhang@rock-chips.com>
Co-developed-by: Frank Wang <frank.wang@rock-chips.com>
Signed-off-by: Frank Wang <frank.wang@rock-chips.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/phy/rockchip/Kconfig              |   12 +
 drivers/phy/rockchip/Makefile             |    1 +
 drivers/phy/rockchip/phy-rockchip-usbdp.c | 1612 +++++++++++++++++++++
 3 files changed, 1625 insertions(+)

diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig
index a34f67bb7e61..c3d62243b474 100644
--- a/drivers/phy/rockchip/Kconfig
+++ b/drivers/phy/rockchip/Kconfig
@@ -115,3 +115,15 @@ config PHY_ROCKCHIP_USB
 	select GENERIC_PHY
 	help
 	  Enable this to support the Rockchip USB 2.0 PHY.
+
+config PHY_ROCKCHIP_USBDP
+	tristate "Rockchip USBDP COMBO PHY Driver"
+	depends on ARCH_ROCKCHIP && OF
+	select GENERIC_PHY
+	select TYPEC
+	help
+	  Enable this to support the Rockchip USB3.0/DP combo PHY with
+	  Samsung IP block. This is required for USB3 support on RK3588.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called phy-rockchip-usbdp
diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile
index 3d911304e654..010a824e32ce 100644
--- a/drivers/phy/rockchip/Makefile
+++ b/drivers/phy/rockchip/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX)	+= phy-rockchip-samsung-hdptx.o
 obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3)	+= phy-rockchip-snps-pcie3.o
 obj-$(CONFIG_PHY_ROCKCHIP_TYPEC)	+= phy-rockchip-typec.o
 obj-$(CONFIG_PHY_ROCKCHIP_USB)		+= phy-rockchip-usb.o
+obj-$(CONFIG_PHY_ROCKCHIP_USBDP)	+= phy-rockchip-usbdp.o
diff --git a/drivers/phy/rockchip/phy-rockchip-usbdp.c b/drivers/phy/rockchip/phy-rockchip-usbdp.c
new file mode 100644
index 000000000000..38dc96cfe403
--- /dev/null
+++ b/drivers/phy/rockchip/phy-rockchip-usbdp.c
@@ -0,0 +1,1612 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Rockchip USBDP Combo PHY with Samsung IP block driver
+ *
+ * Copyright (C) 2021 Rockchip Electronics Co., Ltd
+ */
+
+#include <dt-bindings/phy/phy.h>
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/typec_dp.h>
+#include <linux/usb/typec_mux.h>
+
+/* USBDP PHY Register Definitions */
+#define UDPHY_PCS				0x4000
+#define UDPHY_PMA				0x8000
+
+/* VO0 GRF Registers */
+#define DP_SINK_HPD_CFG				BIT(11)
+#define DP_SINK_HPD_SEL				BIT(10)
+#define DP_AUX_DIN_SEL				BIT(9)
+#define DP_AUX_DOUT_SEL				BIT(8)
+#define DP_LANE_SEL_N(n)			GENMASK(2 * (n) + 1, 2 * (n))
+#define DP_LANE_SEL_ALL				GENMASK(7, 0)
+
+/* PMA CMN Registers */
+#define CMN_LANE_MUX_AND_EN_OFFSET		0x0288	/* cmn_reg00A2 */
+#define CMN_DP_LANE_MUX_N(n)			BIT((n) + 4)
+#define CMN_DP_LANE_EN_N(n)			BIT(n)
+#define CMN_DP_LANE_MUX_ALL			GENMASK(7, 4)
+#define CMN_DP_LANE_EN_ALL			GENMASK(3, 0)
+
+#define CMN_DP_LINK_OFFSET			0x28c	/* cmn_reg00A3 */
+#define CMN_DP_TX_LINK_BW			GENMASK(6, 5)
+#define CMN_DP_TX_LANE_SWAP_EN			BIT(2)
+
+#define CMN_SSC_EN_OFFSET			0x2d0	/* cmn_reg00B4 */
+#define CMN_ROPLL_SSC_EN			BIT(1)
+#define CMN_LCPLL_SSC_EN			BIT(0)
+
+#define CMN_ANA_LCPLL_DONE_OFFSET		0x0350	/* cmn_reg00D4 */
+#define CMN_ANA_LCPLL_LOCK_DONE			BIT(7)
+#define CMN_ANA_LCPLL_AFC_DONE			BIT(6)
+
+#define CMN_ANA_ROPLL_DONE_OFFSET		0x0354	/* cmn_reg00D5 */
+#define CMN_ANA_ROPLL_LOCK_DONE			BIT(1)
+#define CMN_ANA_ROPLL_AFC_DONE			BIT(0)
+
+#define CMN_DP_RSTN_OFFSET			0x038c	/* cmn_reg00E3 */
+#define CMN_DP_INIT_RSTN			BIT(3)
+#define CMN_DP_CMN_RSTN				BIT(2)
+#define CMN_CDR_WTCHDG_EN			BIT(1)
+#define CMN_CDR_WTCHDG_MSK_CDR_EN		BIT(0)
+
+#define TRSV_ANA_TX_CLK_OFFSET_N(n)		(0x854 + (n) * 0x800)	/* trsv_reg0215 */
+#define LN_ANA_TX_SER_TXCLK_INV			BIT(1)
+
+#define TRSV_LN0_MON_RX_CDR_DONE_OFFSET		0x0b84	/* trsv_reg02E1 */
+#define TRSV_LN0_MON_RX_CDR_LOCK_DONE		BIT(0)
+
+#define TRSV_LN2_MON_RX_CDR_DONE_OFFSET		0x1b84	/* trsv_reg06E1 */
+#define TRSV_LN2_MON_RX_CDR_LOCK_DONE		BIT(0)
+
+#define BIT_WRITEABLE_SHIFT			16
+#define PHY_AUX_DP_DATA_POL_NORMAL		0
+#define PHY_AUX_DP_DATA_POL_INVERT		1
+#define PHY_LANE_MUX_USB			0
+#define PHY_LANE_MUX_DP				1
+
+enum {
+	DP_BW_RBR,
+	DP_BW_HBR,
+	DP_BW_HBR2,
+	DP_BW_HBR3,
+};
+
+enum {
+	UDPHY_MODE_NONE		= 0,
+	UDPHY_MODE_USB		= BIT(0),
+	UDPHY_MODE_DP		= BIT(1),
+	UDPHY_MODE_DP_USB	= BIT(1) | BIT(0),
+};
+
+struct rk_udphy_grf_reg {
+	unsigned int	offset;
+	unsigned int	disable;
+	unsigned int	enable;
+};
+
+#define _RK_UDPHY_GEN_GRF_REG(offset, mask, disable, enable) \
+{\
+	offset, \
+	FIELD_PREP_CONST(mask, disable) | (mask << BIT_WRITEABLE_SHIFT), \
+	FIELD_PREP_CONST(mask, enable) | (mask << BIT_WRITEABLE_SHIFT), \
+}
+
+#define RK_UDPHY_GEN_GRF_REG(offset, bitend, bitstart, disable, enable) \
+	_RK_UDPHY_GEN_GRF_REG(offset, GENMASK(bitend, bitstart), disable, enable)
+
+struct rk_udphy_grf_cfg {
+	/* u2phy-grf */
+	struct rk_udphy_grf_reg	bvalid_phy_con;
+	struct rk_udphy_grf_reg	bvalid_grf_con;
+
+	/* usb-grf */
+	struct rk_udphy_grf_reg	usb3otg0_cfg;
+	struct rk_udphy_grf_reg	usb3otg1_cfg;
+
+	/* usbdpphy-grf */
+	struct rk_udphy_grf_reg	low_pwrn;
+	struct rk_udphy_grf_reg	rx_lfps;
+};
+
+struct rk_udphy_vogrf_cfg {
+	/* vo-grf */
+	struct rk_udphy_grf_reg hpd_trigger;
+	u32 dp_lane_reg;
+};
+
+struct rk_udphy_dp_tx_drv_ctrl {
+	u32 trsv_reg0204;
+	u32 trsv_reg0205;
+	u32 trsv_reg0206;
+	u32 trsv_reg0207;
+};
+
+struct rk_udphy_cfg {
+	unsigned int num_phys;
+	unsigned int phy_ids[2];
+	/* resets to be requested */
+	const char * const *rst_list;
+	int num_rsts;
+
+	struct rk_udphy_grf_cfg grfcfg;
+	struct rk_udphy_vogrf_cfg vogrfcfg[2];
+	const struct rk_udphy_dp_tx_drv_ctrl (*dp_tx_ctrl_cfg[4])[4];
+	const struct rk_udphy_dp_tx_drv_ctrl (*dp_tx_ctrl_cfg_typec[4])[4];
+};
+
+struct rk_udphy {
+	struct device *dev;
+	struct regmap *pma_regmap;
+	struct regmap *u2phygrf;
+	struct regmap *udphygrf;
+	struct regmap *usbgrf;
+	struct regmap *vogrf;
+	struct typec_switch_dev *sw;
+	struct typec_mux_dev *mux;
+	struct mutex mutex; /* mutex to protect access to individual PHYs */
+
+	/* clocks and rests */
+	int num_clks;
+	struct clk_bulk_data *clks;
+	struct clk *refclk;
+	int num_rsts;
+	struct reset_control_bulk_data *rsts;
+
+	/* PHY status management */
+	bool flip;
+	bool mode_change;
+	u8 mode;
+	u8 status;
+
+	/* utilized for USB */
+	bool hs; /* flag for high-speed */
+
+	/* utilized for DP */
+	struct gpio_desc *sbu1_dc_gpio;
+	struct gpio_desc *sbu2_dc_gpio;
+	u32 lane_mux_sel[4];
+	u32 dp_lane_sel[4];
+	u32 dp_aux_dout_sel;
+	u32 dp_aux_din_sel;
+	bool dp_sink_hpd_sel;
+	bool dp_sink_hpd_cfg;
+	u8 bw;
+	int id;
+
+	bool dp_in_use;
+
+	/* PHY const config */
+	const struct rk_udphy_cfg *cfgs;
+
+	/* PHY devices */
+	struct phy *phy_dp;
+	struct phy *phy_u3;
+};
+
+static const struct rk_udphy_dp_tx_drv_ctrl rk3588_dp_tx_drv_ctrl_rbr_hbr[4][4] = {
+	/* voltage swing 0, pre-emphasis 0->3 */
+	{
+		{ 0x20, 0x10, 0x42, 0xe5 },
+		{ 0x26, 0x14, 0x42, 0xe5 },
+		{ 0x29, 0x18, 0x42, 0xe5 },
+		{ 0x2b, 0x1c, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 1, pre-emphasis 0->2 */
+	{
+		{ 0x23, 0x10, 0x42, 0xe7 },
+		{ 0x2a, 0x17, 0x43, 0xe7 },
+		{ 0x2b, 0x1a, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 2, pre-emphasis 0->1 */
+	{
+		{ 0x27, 0x10, 0x42, 0xe7 },
+		{ 0x2b, 0x17, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 3, pre-emphasis 0 */
+	{
+		{ 0x29, 0x10, 0x43, 0xe7 },
+	},
+};
+
+static const struct rk_udphy_dp_tx_drv_ctrl rk3588_dp_tx_drv_ctrl_rbr_hbr_typec[4][4] = {
+	/* voltage swing 0, pre-emphasis 0->3 */
+	{
+		{ 0x20, 0x10, 0x42, 0xe5 },
+		{ 0x26, 0x14, 0x42, 0xe5 },
+		{ 0x29, 0x18, 0x42, 0xe5 },
+		{ 0x2b, 0x1c, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 1, pre-emphasis 0->2 */
+	{
+		{ 0x23, 0x10, 0x42, 0xe7 },
+		{ 0x2a, 0x17, 0x43, 0xe7 },
+		{ 0x2b, 0x1a, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 2, pre-emphasis 0->1 */
+	{
+		{ 0x27, 0x10, 0x43, 0x67 },
+		{ 0x2b, 0x17, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 3, pre-emphasis 0 */
+	{
+		{ 0x29, 0x10, 0x43, 0xe7 },
+	},
+};
+
+static const struct rk_udphy_dp_tx_drv_ctrl rk3588_dp_tx_drv_ctrl_hbr2[4][4] = {
+	/* voltage swing 0, pre-emphasis 0->3 */
+	{
+		{ 0x21, 0x10, 0x42, 0xe5 },
+		{ 0x26, 0x14, 0x42, 0xe5 },
+		{ 0x26, 0x16, 0x43, 0xe5 },
+		{ 0x2a, 0x19, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 1, pre-emphasis 0->2 */
+	{
+		{ 0x24, 0x10, 0x42, 0xe7 },
+		{ 0x2a, 0x17, 0x43, 0xe7 },
+		{ 0x2b, 0x1a, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 2, pre-emphasis 0->1 */
+	{
+		{ 0x28, 0x10, 0x42, 0xe7 },
+		{ 0x2b, 0x17, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 3, pre-emphasis 0 */
+	{
+		{ 0x28, 0x10, 0x43, 0xe7 },
+	},
+};
+
+static const struct rk_udphy_dp_tx_drv_ctrl rk3588_dp_tx_drv_ctrl_hbr3[4][4] = {
+	/* voltage swing 0, pre-emphasis 0->3 */
+	{
+		{ 0x21, 0x10, 0x42, 0xe5 },
+		{ 0x26, 0x14, 0x42, 0xe5 },
+		{ 0x26, 0x16, 0x43, 0xe5 },
+		{ 0x29, 0x18, 0x43, 0xe7 },
+	},
+
+	/* voltage swing 1, pre-emphasis 0->2 */
+	{
+		{ 0x24, 0x10, 0x42, 0xe7 },
+		{ 0x2a, 0x18, 0x43, 0xe7 },
+		{ 0x2b, 0x1b, 0x43, 0xe7 }
+	},
+
+	/* voltage swing 2, pre-emphasis 0->1 */
+	{
+		{ 0x27, 0x10, 0x42, 0xe7 },
+		{ 0x2b, 0x18, 0x43, 0xe7 }
+	},
+
+	/* voltage swing 3, pre-emphasis 0 */
+	{
+		{ 0x28, 0x10, 0x43, 0xe7 },
+	},
+};
+
+static const struct reg_sequence rk_udphy_24m_refclk_cfg[] = {
+	{0x0090, 0x68}, {0x0094, 0x68},
+	{0x0128, 0x24}, {0x012c, 0x44},
+	{0x0130, 0x3f}, {0x0134, 0x44},
+	{0x015c, 0xa9}, {0x0160, 0x71},
+	{0x0164, 0x71}, {0x0168, 0xa9},
+	{0x0174, 0xa9}, {0x0178, 0x71},
+	{0x017c, 0x71}, {0x0180, 0xa9},
+	{0x018c, 0x41}, {0x0190, 0x00},
+	{0x0194, 0x05}, {0x01ac, 0x2a},
+	{0x01b0, 0x17}, {0x01b4, 0x17},
+	{0x01b8, 0x2a}, {0x01c8, 0x04},
+	{0x01cc, 0x08}, {0x01d0, 0x08},
+	{0x01d4, 0x04}, {0x01d8, 0x20},
+	{0x01dc, 0x01}, {0x01e0, 0x09},
+	{0x01e4, 0x03}, {0x01f0, 0x29},
+	{0x01f4, 0x02}, {0x01f8, 0x02},
+	{0x01fc, 0x29}, {0x0208, 0x2a},
+	{0x020c, 0x17}, {0x0210, 0x17},
+	{0x0214, 0x2a}, {0x0224, 0x20},
+	{0x03f0, 0x0a}, {0x03f4, 0x07},
+	{0x03f8, 0x07}, {0x03fc, 0x0c},
+	{0x0404, 0x12}, {0x0408, 0x1a},
+	{0x040c, 0x1a}, {0x0410, 0x3f},
+	{0x0ce0, 0x68}, {0x0ce8, 0xd0},
+	{0x0cf0, 0x87}, {0x0cf8, 0x70},
+	{0x0d00, 0x70}, {0x0d08, 0xa9},
+	{0x1ce0, 0x68}, {0x1ce8, 0xd0},
+	{0x1cf0, 0x87}, {0x1cf8, 0x70},
+	{0x1d00, 0x70}, {0x1d08, 0xa9},
+	{0x0a3c, 0xd0}, {0x0a44, 0xd0},
+	{0x0a48, 0x01}, {0x0a4c, 0x0d},
+	{0x0a54, 0xe0}, {0x0a5c, 0xe0},
+	{0x0a64, 0xa8}, {0x1a3c, 0xd0},
+	{0x1a44, 0xd0}, {0x1a48, 0x01},
+	{0x1a4c, 0x0d}, {0x1a54, 0xe0},
+	{0x1a5c, 0xe0}, {0x1a64, 0xa8}
+};
+
+static const struct reg_sequence rk_udphy_26m_refclk_cfg[] = {
+	{0x0830, 0x07}, {0x085c, 0x80},
+	{0x1030, 0x07}, {0x105c, 0x80},
+	{0x1830, 0x07}, {0x185c, 0x80},
+	{0x2030, 0x07}, {0x205c, 0x80},
+	{0x0228, 0x38}, {0x0104, 0x44},
+	{0x0248, 0x44}, {0x038c, 0x02},
+	{0x0878, 0x04}, {0x1878, 0x04},
+	{0x0898, 0x77}, {0x1898, 0x77},
+	{0x0054, 0x01}, {0x00e0, 0x38},
+	{0x0060, 0x24}, {0x0064, 0x77},
+	{0x0070, 0x76}, {0x0234, 0xe8},
+	{0x0af4, 0x15}, {0x1af4, 0x15},
+	{0x081c, 0xe5}, {0x181c, 0xe5},
+	{0x099c, 0x48}, {0x199c, 0x48},
+	{0x09a4, 0x07}, {0x09a8, 0x22},
+	{0x19a4, 0x07}, {0x19a8, 0x22},
+	{0x09b8, 0x3e}, {0x19b8, 0x3e},
+	{0x09e4, 0x02}, {0x19e4, 0x02},
+	{0x0a34, 0x1e}, {0x1a34, 0x1e},
+	{0x0a98, 0x2f}, {0x1a98, 0x2f},
+	{0x0c30, 0x0e}, {0x0c48, 0x06},
+	{0x1c30, 0x0e}, {0x1c48, 0x06},
+	{0x028c, 0x18}, {0x0af0, 0x00},
+	{0x1af0, 0x00}
+};
+
+static const struct reg_sequence rk_udphy_init_sequence[] = {
+	{0x0104, 0x44}, {0x0234, 0xe8},
+	{0x0248, 0x44}, {0x028c, 0x18},
+	{0x081c, 0xe5}, {0x0878, 0x00},
+	{0x0994, 0x1c}, {0x0af0, 0x00},
+	{0x181c, 0xe5}, {0x1878, 0x00},
+	{0x1994, 0x1c}, {0x1af0, 0x00},
+	{0x0428, 0x60}, {0x0d58, 0x33},
+	{0x1d58, 0x33}, {0x0990, 0x74},
+	{0x0d64, 0x17}, {0x08c8, 0x13},
+	{0x1990, 0x74}, {0x1d64, 0x17},
+	{0x18c8, 0x13}, {0x0d90, 0x40},
+	{0x0da8, 0x40}, {0x0dc0, 0x40},
+	{0x0dd8, 0x40}, {0x1d90, 0x40},
+	{0x1da8, 0x40}, {0x1dc0, 0x40},
+	{0x1dd8, 0x40}, {0x03c0, 0x30},
+	{0x03c4, 0x06}, {0x0e10, 0x00},
+	{0x1e10, 0x00}, {0x043c, 0x0f},
+	{0x0d2c, 0xff}, {0x1d2c, 0xff},
+	{0x0d34, 0x0f}, {0x1d34, 0x0f},
+	{0x08fc, 0x2a}, {0x0914, 0x28},
+	{0x0a30, 0x03}, {0x0e38, 0x03},
+	{0x0ecc, 0x27}, {0x0ed0, 0x22},
+	{0x0ed4, 0x26}, {0x18fc, 0x2a},
+	{0x1914, 0x28}, {0x1a30, 0x03},
+	{0x1e38, 0x03}, {0x1ecc, 0x27},
+	{0x1ed0, 0x22}, {0x1ed4, 0x26},
+	{0x0048, 0x0f}, {0x0060, 0x3c},
+	{0x0064, 0xf7}, {0x006c, 0x20},
+	{0x0070, 0x7d}, {0x0074, 0x68},
+	{0x0af4, 0x1a}, {0x1af4, 0x1a},
+	{0x0440, 0x3f}, {0x10d4, 0x08},
+	{0x20d4, 0x08}, {0x00d4, 0x30},
+	{0x0024, 0x6e},
+};
+
+static inline int rk_udphy_grfreg_write(struct regmap *base,
+					const struct rk_udphy_grf_reg *reg, bool en)
+{
+	return regmap_write(base, reg->offset, en ? reg->enable : reg->disable);
+}
+
+static int rk_udphy_clk_init(struct rk_udphy *udphy, struct device *dev)
+{
+	int i;
+
+	udphy->num_clks = devm_clk_bulk_get_all(dev, &udphy->clks);
+	if (udphy->num_clks < 1)
+		return -ENODEV;
+
+	/* used for configure phy reference clock frequency */
+	for (i = 0; i < udphy->num_clks; i++) {
+		if (!strncmp(udphy->clks[i].id, "refclk", 6)) {
+			udphy->refclk = udphy->clks[i].clk;
+			break;
+		}
+	}
+
+	if (!udphy->refclk)
+		return dev_err_probe(udphy->dev, -EINVAL, "no refclk found\n");
+
+	return 0;
+}
+
+static int rk_udphy_reset_assert_all(struct rk_udphy *udphy)
+{
+	return reset_control_bulk_assert(udphy->num_rsts, udphy->rsts);
+}
+
+static int rk_udphy_reset_deassert_all(struct rk_udphy *udphy)
+{
+	return reset_control_bulk_deassert(udphy->num_rsts, udphy->rsts);
+}
+
+static int rk_udphy_reset_deassert(struct rk_udphy *udphy, char *name)
+{
+	struct reset_control_bulk_data *list = udphy->rsts;
+	int idx;
+
+	for (idx = 0; idx < udphy->num_rsts; idx++) {
+		if (!strcmp(list[idx].id, name))
+			return reset_control_deassert(list[idx].rstc);
+	}
+
+	return -EINVAL;
+}
+
+static int rk_udphy_reset_init(struct rk_udphy *udphy, struct device *dev)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+	int idx;
+
+	udphy->num_rsts = cfg->num_rsts;
+	udphy->rsts = devm_kcalloc(dev, udphy->num_rsts,
+				   sizeof(*udphy->rsts), GFP_KERNEL);
+	if (!udphy->rsts)
+		return -ENOMEM;
+
+	for (idx = 0; idx < cfg->num_rsts; idx++)
+		udphy->rsts[idx].id = cfg->rst_list[idx];
+
+	return devm_reset_control_bulk_get_exclusive(dev, cfg->num_rsts,
+						     udphy->rsts);
+}
+
+static void rk_udphy_u3_port_disable(struct rk_udphy *udphy, u8 disable)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+	const struct rk_udphy_grf_reg *preg;
+
+	preg = udphy->id ? &cfg->grfcfg.usb3otg1_cfg : &cfg->grfcfg.usb3otg0_cfg;
+	rk_udphy_grfreg_write(udphy->usbgrf, preg, disable);
+}
+
+static void rk_udphy_usb_bvalid_enable(struct rk_udphy *udphy, u8 enable)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+
+	rk_udphy_grfreg_write(udphy->u2phygrf, &cfg->grfcfg.bvalid_phy_con, enable);
+	rk_udphy_grfreg_write(udphy->u2phygrf, &cfg->grfcfg.bvalid_grf_con, enable);
+}
+
+/*
+ * In usb/dp combo phy driver, here are 2 ways to mapping lanes.
+ *
+ * 1 Type-C Mapping table (DP_Alt_Mode V1.0b remove ABF pin mapping)
+ * ---------------------------------------------------------------------------
+ * Type-C Pin   B11-B10       A2-A3       A11-A10       B2-B3
+ * PHY Pad      ln0(tx/rx)    ln1(tx)     ln2(tx/rx)    ln3(tx)
+ * C/E(Normal)  dpln3         dpln2       dpln0         dpln1
+ * C/E(Flip  )  dpln0         dpln1       dpln3         dpln2
+ * D/F(Normal)  usbrx         usbtx       dpln0         dpln1
+ * D/F(Flip  )  dpln0         dpln1       usbrx         usbtx
+ * A(Normal  )  dpln3         dpln1       dpln2         dpln0
+ * A(Flip    )  dpln2         dpln0       dpln3         dpln1
+ * B(Normal  )  usbrx         usbtx       dpln1         dpln0
+ * B(Flip    )  dpln1         dpln0       usbrx         usbtx
+ * ---------------------------------------------------------------------------
+ *
+ * 2 Mapping the lanes in dtsi
+ * if all 4 lane assignment for dp function, define rockchip,dp-lane-mux = <x x x x>;
+ * sample as follow:
+ * ---------------------------------------------------------------------------
+ *                        B11-B10       A2-A3       A11-A10       B2-B3
+ * rockchip,dp-lane-mux   ln0(tx/rx)    ln1(tx)     ln2(tx/rx)    ln3(tx)
+ * <0 1 2 3>              dpln0         dpln1       dpln2         dpln3
+ * <2 3 0 1>              dpln2         dpln3       dpln0         dpln1
+ * ---------------------------------------------------------------------------
+ * if 2 lane for dp function, 2 lane for usb function, define rockchip,dp-lane-mux = <x x>;
+ * sample as follow:
+ * ---------------------------------------------------------------------------
+ *                        B11-B10       A2-A3       A11-A10       B2-B3
+ * rockchip,dp-lane-mux   ln0(tx/rx)    ln1(tx)     ln2(tx/rx)    ln3(tx)
+ * <0 1>                  dpln0         dpln1       usbrx         usbtx
+ * <2 3>                  usbrx         usbtx       dpln0         dpln1
+ * ---------------------------------------------------------------------------
+ */
+
+static void rk_udphy_dplane_select(struct rk_udphy *udphy)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+	u32 value = 0;
+
+	switch (udphy->mode) {
+	case UDPHY_MODE_DP:
+		value |= 2 << udphy->dp_lane_sel[2] * 2;
+		value |= 3 << udphy->dp_lane_sel[3] * 2;
+		fallthrough;
+
+	case UDPHY_MODE_DP_USB:
+		value |= 0 << udphy->dp_lane_sel[0] * 2;
+		value |= 1 << udphy->dp_lane_sel[1] * 2;
+		break;
+
+	case UDPHY_MODE_USB:
+		break;
+
+	default:
+		break;
+	}
+
+	regmap_write(udphy->vogrf, cfg->vogrfcfg[udphy->id].dp_lane_reg,
+		     ((DP_AUX_DIN_SEL | DP_AUX_DOUT_SEL | DP_LANE_SEL_ALL) << 16) |
+		     FIELD_PREP(DP_AUX_DIN_SEL, udphy->dp_aux_din_sel) |
+		     FIELD_PREP(DP_AUX_DOUT_SEL, udphy->dp_aux_dout_sel) | value);
+}
+
+static int rk_udphy_dplane_get(struct rk_udphy *udphy)
+{
+	int dp_lanes;
+
+	switch (udphy->mode) {
+	case UDPHY_MODE_DP:
+		dp_lanes = 4;
+		break;
+
+	case UDPHY_MODE_DP_USB:
+		dp_lanes = 2;
+		break;
+
+	case UDPHY_MODE_USB:
+	default:
+		dp_lanes = 0;
+		break;
+	}
+
+	return dp_lanes;
+}
+
+static void rk_udphy_dplane_enable(struct rk_udphy *udphy, int dp_lanes)
+{
+	u32 val = 0;
+	int i;
+
+	for (i = 0; i < dp_lanes; i++)
+		val |= BIT(udphy->dp_lane_sel[i]);
+
+	regmap_update_bits(udphy->pma_regmap, CMN_LANE_MUX_AND_EN_OFFSET, CMN_DP_LANE_EN_ALL,
+			   FIELD_PREP(CMN_DP_LANE_EN_ALL, val));
+
+	if (!dp_lanes)
+		regmap_update_bits(udphy->pma_regmap, CMN_DP_RSTN_OFFSET,
+				   CMN_DP_CMN_RSTN, FIELD_PREP(CMN_DP_CMN_RSTN, 0x0));
+}
+
+static void rk_udphy_dp_hpd_event_trigger(struct rk_udphy *udphy, bool hpd)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+
+	udphy->dp_sink_hpd_sel = true;
+	udphy->dp_sink_hpd_cfg = hpd;
+
+	if (!udphy->dp_in_use)
+		return;
+
+	rk_udphy_grfreg_write(udphy->vogrf, &cfg->vogrfcfg[udphy->id].hpd_trigger, hpd);
+}
+
+static void rk_udphy_set_typec_default_mapping(struct rk_udphy *udphy)
+{
+	if (udphy->flip) {
+		udphy->dp_lane_sel[0] = 0;
+		udphy->dp_lane_sel[1] = 1;
+		udphy->dp_lane_sel[2] = 3;
+		udphy->dp_lane_sel[3] = 2;
+		udphy->lane_mux_sel[0] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[1] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[2] = PHY_LANE_MUX_USB;
+		udphy->lane_mux_sel[3] = PHY_LANE_MUX_USB;
+		udphy->dp_aux_dout_sel = PHY_AUX_DP_DATA_POL_INVERT;
+		udphy->dp_aux_din_sel = PHY_AUX_DP_DATA_POL_INVERT;
+		gpiod_set_value_cansleep(udphy->sbu1_dc_gpio, 1);
+		gpiod_set_value_cansleep(udphy->sbu2_dc_gpio, 0);
+	} else {
+		udphy->dp_lane_sel[0] = 2;
+		udphy->dp_lane_sel[1] = 3;
+		udphy->dp_lane_sel[2] = 1;
+		udphy->dp_lane_sel[3] = 0;
+		udphy->lane_mux_sel[0] = PHY_LANE_MUX_USB;
+		udphy->lane_mux_sel[1] = PHY_LANE_MUX_USB;
+		udphy->lane_mux_sel[2] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[3] = PHY_LANE_MUX_DP;
+		udphy->dp_aux_dout_sel = PHY_AUX_DP_DATA_POL_NORMAL;
+		udphy->dp_aux_din_sel = PHY_AUX_DP_DATA_POL_NORMAL;
+		gpiod_set_value_cansleep(udphy->sbu1_dc_gpio, 0);
+		gpiod_set_value_cansleep(udphy->sbu2_dc_gpio, 1);
+	}
+
+	udphy->mode = UDPHY_MODE_DP_USB;
+}
+
+static int rk_udphy_orien_sw_set(struct typec_switch_dev *sw,
+				 enum typec_orientation orien)
+{
+	struct rk_udphy *udphy = typec_switch_get_drvdata(sw);
+
+	mutex_lock(&udphy->mutex);
+
+	if (orien == TYPEC_ORIENTATION_NONE) {
+		gpiod_set_value_cansleep(udphy->sbu1_dc_gpio, 0);
+		gpiod_set_value_cansleep(udphy->sbu2_dc_gpio, 0);
+		/* unattached */
+		rk_udphy_usb_bvalid_enable(udphy, false);
+		goto unlock_ret;
+	}
+
+	udphy->flip = (orien == TYPEC_ORIENTATION_REVERSE) ? true : false;
+	rk_udphy_set_typec_default_mapping(udphy);
+	rk_udphy_usb_bvalid_enable(udphy, true);
+
+unlock_ret:
+	mutex_unlock(&udphy->mutex);
+	return 0;
+}
+
+static void rk_udphy_orien_switch_unregister(void *data)
+{
+	struct rk_udphy *udphy = data;
+
+	typec_switch_unregister(udphy->sw);
+}
+
+static int rk_udphy_setup_orien_switch(struct rk_udphy *udphy)
+{
+	struct typec_switch_desc sw_desc = { };
+
+	sw_desc.drvdata = udphy;
+	sw_desc.fwnode = dev_fwnode(udphy->dev);
+	sw_desc.set = rk_udphy_orien_sw_set;
+
+	udphy->sw = typec_switch_register(udphy->dev, &sw_desc);
+	if (IS_ERR(udphy->sw)) {
+		dev_err(udphy->dev, "Error register typec orientation switch: %ld\n",
+			PTR_ERR(udphy->sw));
+		return PTR_ERR(udphy->sw);
+	}
+
+	return devm_add_action_or_reset(udphy->dev,
+					rk_udphy_orien_switch_unregister, udphy);
+}
+
+static int rk_udphy_refclk_set(struct rk_udphy *udphy)
+{
+	unsigned long rate;
+	int ret;
+
+	/* configure phy reference clock */
+	rate = clk_get_rate(udphy->refclk);
+	dev_dbg(udphy->dev, "refclk freq %ld\n", rate);
+
+	switch (rate) {
+	case 24000000:
+		ret = regmap_multi_reg_write(udphy->pma_regmap, rk_udphy_24m_refclk_cfg,
+					     ARRAY_SIZE(rk_udphy_24m_refclk_cfg));
+		if (ret)
+			return ret;
+		break;
+
+	case 26000000:
+		/* register default is 26MHz */
+		ret = regmap_multi_reg_write(udphy->pma_regmap, rk_udphy_26m_refclk_cfg,
+					     ARRAY_SIZE(rk_udphy_26m_refclk_cfg));
+		if (ret)
+			return ret;
+		break;
+
+	default:
+		dev_err(udphy->dev, "unsupported refclk freq %ld\n", rate);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rk_udphy_status_check(struct rk_udphy *udphy)
+{
+	unsigned int val;
+	int ret;
+
+	/* LCPLL check */
+	if (udphy->mode & UDPHY_MODE_USB) {
+		ret = regmap_read_poll_timeout(udphy->pma_regmap, CMN_ANA_LCPLL_DONE_OFFSET,
+					       val, (val & CMN_ANA_LCPLL_AFC_DONE) &&
+					       (val & CMN_ANA_LCPLL_LOCK_DONE), 200, 100000);
+		if (ret) {
+			dev_err(udphy->dev, "cmn ana lcpll lock timeout\n");
+			/*
+			 * If earlier software (U-Boot) enabled USB once already
+			 * the PLL may have problems locking on the first try.
+			 * It will be successful on the second try, so for the
+			 * time being a -EPROBE_DEFER will solve the issue.
+			 *
+			 * This requires further investigation to understand the
+			 * root cause, especially considering that the driver is
+			 * asserting all reset lines at probe time.
+			 */
+			return -EPROBE_DEFER;
+		}
+
+		if (!udphy->flip) {
+			ret = regmap_read_poll_timeout(udphy->pma_regmap,
+						       TRSV_LN0_MON_RX_CDR_DONE_OFFSET, val,
+						       val & TRSV_LN0_MON_RX_CDR_LOCK_DONE,
+						       200, 100000);
+			if (ret)
+				dev_err(udphy->dev, "trsv ln0 mon rx cdr lock timeout\n");
+		} else {
+			ret = regmap_read_poll_timeout(udphy->pma_regmap,
+						       TRSV_LN2_MON_RX_CDR_DONE_OFFSET, val,
+						       val & TRSV_LN2_MON_RX_CDR_LOCK_DONE,
+						       200, 100000);
+			if (ret)
+				dev_err(udphy->dev, "trsv ln2 mon rx cdr lock timeout\n");
+		}
+	}
+
+	return 0;
+}
+
+static int rk_udphy_init(struct rk_udphy *udphy)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+	int ret;
+
+	rk_udphy_reset_assert_all(udphy);
+	usleep_range(10000, 11000);
+
+	/* enable rx lfps for usb */
+	if (udphy->mode & UDPHY_MODE_USB)
+		rk_udphy_grfreg_write(udphy->udphygrf, &cfg->grfcfg.rx_lfps, true);
+
+	/* Step 1: power on pma and deassert apb rstn */
+	rk_udphy_grfreg_write(udphy->udphygrf, &cfg->grfcfg.low_pwrn, true);
+
+	rk_udphy_reset_deassert(udphy, "pma_apb");
+	rk_udphy_reset_deassert(udphy, "pcs_apb");
+
+	/* Step 2: set init sequence and phy refclk */
+	ret = regmap_multi_reg_write(udphy->pma_regmap, rk_udphy_init_sequence,
+				     ARRAY_SIZE(rk_udphy_init_sequence));
+	if (ret) {
+		dev_err(udphy->dev, "init sequence set error %d\n", ret);
+		goto assert_resets;
+	}
+
+	ret = rk_udphy_refclk_set(udphy);
+	if (ret) {
+		dev_err(udphy->dev, "refclk set error %d\n", ret);
+		goto assert_resets;
+	}
+
+	/* Step 3: configure lane mux */
+	regmap_update_bits(udphy->pma_regmap, CMN_LANE_MUX_AND_EN_OFFSET,
+			   CMN_DP_LANE_MUX_ALL | CMN_DP_LANE_EN_ALL,
+			   FIELD_PREP(CMN_DP_LANE_MUX_N(3), udphy->lane_mux_sel[3]) |
+			   FIELD_PREP(CMN_DP_LANE_MUX_N(2), udphy->lane_mux_sel[2]) |
+			   FIELD_PREP(CMN_DP_LANE_MUX_N(1), udphy->lane_mux_sel[1]) |
+			   FIELD_PREP(CMN_DP_LANE_MUX_N(0), udphy->lane_mux_sel[0]) |
+			   FIELD_PREP(CMN_DP_LANE_EN_ALL, 0));
+
+	/* Step 4: deassert init rstn and wait for 200ns from datasheet */
+	if (udphy->mode & UDPHY_MODE_USB)
+		rk_udphy_reset_deassert(udphy, "init");
+
+	if (udphy->mode & UDPHY_MODE_DP) {
+		regmap_update_bits(udphy->pma_regmap, CMN_DP_RSTN_OFFSET,
+				   CMN_DP_INIT_RSTN,
+				   FIELD_PREP(CMN_DP_INIT_RSTN, 0x1));
+	}
+
+	udelay(1);
+
+	/*  Step 5: deassert cmn/lane rstn */
+	if (udphy->mode & UDPHY_MODE_USB) {
+		rk_udphy_reset_deassert(udphy, "cmn");
+		rk_udphy_reset_deassert(udphy, "lane");
+	}
+
+	/*  Step 6: wait for lock done of pll */
+	ret = rk_udphy_status_check(udphy);
+	if (ret)
+		goto assert_resets;
+
+	return 0;
+
+assert_resets:
+	rk_udphy_reset_assert_all(udphy);
+	return ret;
+}
+
+static int rk_udphy_setup(struct rk_udphy *udphy)
+{
+	int ret = 0;
+
+	ret = clk_bulk_prepare_enable(udphy->num_clks, udphy->clks);
+	if (ret) {
+		dev_err(udphy->dev, "failed to enable clk\n");
+		return ret;
+	}
+
+	ret = rk_udphy_init(udphy);
+	if (ret) {
+		dev_err(udphy->dev, "failed to init combophy\n");
+		clk_bulk_disable_unprepare(udphy->num_clks, udphy->clks);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void rk_udphy_disable(struct rk_udphy *udphy)
+{
+	clk_bulk_disable_unprepare(udphy->num_clks, udphy->clks);
+	rk_udphy_reset_assert_all(udphy);
+}
+
+static int rk_udphy_parse_lane_mux_data(struct rk_udphy *udphy)
+{
+	int ret, i, num_lanes;
+
+	num_lanes = device_property_count_u32(udphy->dev, "rockchip,dp-lane-mux");
+	if (num_lanes < 0) {
+		dev_dbg(udphy->dev, "no dp-lane-mux, following dp alt mode\n");
+		udphy->mode = UDPHY_MODE_USB;
+		return 0;
+	}
+
+	if (num_lanes != 2 && num_lanes != 4)
+		return dev_err_probe(udphy->dev, -EINVAL,
+				     "invalid number of lane mux\n");
+
+	ret = device_property_read_u32_array(udphy->dev, "rockchip,dp-lane-mux",
+					     udphy->dp_lane_sel, num_lanes);
+	if (ret)
+		return dev_err_probe(udphy->dev, ret, "get dp lane mux failed\n");
+
+	for (i = 0; i < num_lanes; i++) {
+		int j;
+
+		if (udphy->dp_lane_sel[i] > 3)
+			return dev_err_probe(udphy->dev, -EINVAL,
+					     "lane mux between 0 and 3, exceeding the range\n");
+
+		udphy->lane_mux_sel[udphy->dp_lane_sel[i]] = PHY_LANE_MUX_DP;
+
+		for (j = i + 1; j < num_lanes; j++) {
+			if (udphy->dp_lane_sel[i] == udphy->dp_lane_sel[j])
+				return dev_err_probe(udphy->dev, -EINVAL,
+						"set repeat lane mux value\n");
+		}
+	}
+
+	udphy->mode = UDPHY_MODE_DP;
+	if (num_lanes == 2) {
+		udphy->mode |= UDPHY_MODE_USB;
+		udphy->flip = (udphy->lane_mux_sel[0] == PHY_LANE_MUX_DP);
+	}
+
+	return 0;
+}
+
+static int rk_udphy_get_initial_status(struct rk_udphy *udphy)
+{
+	int ret;
+	u32 value;
+
+	ret = clk_bulk_prepare_enable(udphy->num_clks, udphy->clks);
+	if (ret) {
+		dev_err(udphy->dev, "failed to enable clk\n");
+		return ret;
+	}
+
+	rk_udphy_reset_deassert_all(udphy);
+
+	regmap_read(udphy->pma_regmap, CMN_LANE_MUX_AND_EN_OFFSET, &value);
+	if (FIELD_GET(CMN_DP_LANE_MUX_ALL, value) && FIELD_GET(CMN_DP_LANE_EN_ALL, value))
+		udphy->status = UDPHY_MODE_DP;
+	else
+		rk_udphy_disable(udphy);
+
+	return 0;
+}
+
+static int rk_udphy_parse_dt(struct rk_udphy *udphy)
+{
+	struct device *dev = udphy->dev;
+	struct device_node *np = dev_of_node(dev);
+	enum usb_device_speed maximum_speed;
+	int ret;
+
+	udphy->u2phygrf = syscon_regmap_lookup_by_phandle(np, "rockchip,u2phy-grf");
+	if (IS_ERR(udphy->u2phygrf))
+		return dev_err_probe(dev, PTR_ERR(udphy->u2phygrf), "failed to get u2phy-grf\n");
+
+	udphy->udphygrf = syscon_regmap_lookup_by_phandle(np, "rockchip,usbdpphy-grf");
+	if (IS_ERR(udphy->udphygrf))
+		return dev_err_probe(dev, PTR_ERR(udphy->udphygrf), "failed to get usbdpphy-grf\n");
+
+	udphy->usbgrf = syscon_regmap_lookup_by_phandle(np, "rockchip,usb-grf");
+	if (IS_ERR(udphy->usbgrf))
+		return dev_err_probe(dev, PTR_ERR(udphy->usbgrf), "failed to get usb-grf\n");
+
+	udphy->vogrf = syscon_regmap_lookup_by_phandle(np, "rockchip,vo-grf");
+	if (IS_ERR(udphy->vogrf))
+		return dev_err_probe(dev, PTR_ERR(udphy->vogrf), "failed to get vo-grf\n");
+
+	ret = rk_udphy_parse_lane_mux_data(udphy);
+	if (ret)
+		return ret;
+
+	udphy->sbu1_dc_gpio = devm_gpiod_get_optional(dev, "sbu1-dc", GPIOD_OUT_LOW);
+	if (IS_ERR(udphy->sbu1_dc_gpio))
+		return PTR_ERR(udphy->sbu1_dc_gpio);
+
+	udphy->sbu2_dc_gpio = devm_gpiod_get_optional(dev, "sbu2-dc", GPIOD_OUT_LOW);
+	if (IS_ERR(udphy->sbu2_dc_gpio))
+		return PTR_ERR(udphy->sbu2_dc_gpio);
+
+	if (device_property_present(dev, "maximum-speed")) {
+		maximum_speed = usb_get_maximum_speed(dev);
+		udphy->hs = maximum_speed <= USB_SPEED_HIGH ? true : false;
+	}
+
+	ret = rk_udphy_clk_init(udphy, dev);
+	if (ret)
+		return ret;
+
+	ret = rk_udphy_reset_init(udphy, dev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int rk_udphy_power_on(struct rk_udphy *udphy, u8 mode)
+{
+	int ret;
+
+	if (!(udphy->mode & mode)) {
+		dev_info(udphy->dev, "mode 0x%02x is not support\n", mode);
+		return 0;
+	}
+
+	if (udphy->status == UDPHY_MODE_NONE) {
+		udphy->mode_change = false;
+		ret = rk_udphy_setup(udphy);
+		if (ret)
+			return ret;
+
+		if (udphy->mode & UDPHY_MODE_USB)
+			rk_udphy_u3_port_disable(udphy, false);
+	} else if (udphy->mode_change) {
+		udphy->mode_change = false;
+		udphy->status = UDPHY_MODE_NONE;
+		if (udphy->mode == UDPHY_MODE_DP)
+			rk_udphy_u3_port_disable(udphy, true);
+
+		rk_udphy_disable(udphy);
+		ret = rk_udphy_setup(udphy);
+		if (ret)
+			return ret;
+	}
+
+	udphy->status |= mode;
+
+	return 0;
+}
+
+static void rk_udphy_power_off(struct rk_udphy *udphy, u8 mode)
+{
+	if (!(udphy->mode & mode)) {
+		dev_info(udphy->dev, "mode 0x%02x is not support\n", mode);
+		return;
+	}
+
+	if (!udphy->status)
+		return;
+
+	udphy->status &= ~mode;
+
+	if (udphy->status == UDPHY_MODE_NONE)
+		rk_udphy_disable(udphy);
+}
+
+static int rk_udphy_dp_phy_init(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+
+	mutex_lock(&udphy->mutex);
+
+	udphy->dp_in_use = true;
+	rk_udphy_dp_hpd_event_trigger(udphy, udphy->dp_sink_hpd_cfg);
+
+	mutex_unlock(&udphy->mutex);
+
+	return 0;
+}
+
+static int rk_udphy_dp_phy_exit(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+
+	mutex_lock(&udphy->mutex);
+	udphy->dp_in_use = false;
+	mutex_unlock(&udphy->mutex);
+	return 0;
+}
+
+static int rk_udphy_dp_phy_power_on(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+	int ret, dp_lanes;
+
+	mutex_lock(&udphy->mutex);
+
+	dp_lanes = rk_udphy_dplane_get(udphy);
+	phy_set_bus_width(phy, dp_lanes);
+
+	ret = rk_udphy_power_on(udphy, UDPHY_MODE_DP);
+	if (ret)
+		goto unlock;
+
+	rk_udphy_dplane_enable(udphy, dp_lanes);
+
+	rk_udphy_dplane_select(udphy);
+
+unlock:
+	mutex_unlock(&udphy->mutex);
+	/*
+	 * If data send by aux channel too fast after phy power on,
+	 * the aux may be not ready which will cause aux error. Adding
+	 * delay to avoid this issue.
+	 */
+	usleep_range(10000, 11000);
+	return ret;
+}
+
+static int rk_udphy_dp_phy_power_off(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+
+	mutex_lock(&udphy->mutex);
+	rk_udphy_dplane_enable(udphy, 0);
+	rk_udphy_power_off(udphy, UDPHY_MODE_DP);
+	mutex_unlock(&udphy->mutex);
+
+	return 0;
+}
+
+static int rk_udphy_dp_phy_verify_link_rate(unsigned int link_rate)
+{
+	switch (link_rate) {
+	case 1620:
+	case 2700:
+	case 5400:
+	case 8100:
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int rk_udphy_dp_phy_verify_config(struct rk_udphy *udphy,
+					 struct phy_configure_opts_dp *dp)
+{
+	int i, ret;
+
+	/* If changing link rate was required, verify it's supported. */
+	ret = rk_udphy_dp_phy_verify_link_rate(dp->link_rate);
+	if (ret)
+		return ret;
+
+	/* Verify lane count. */
+	switch (dp->lanes) {
+	case 1:
+	case 2:
+	case 4:
+		/* valid lane count. */
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * If changing voltages is required, check swing and pre-emphasis
+	 * levels, per-lane.
+	 */
+	if (dp->set_voltages) {
+		/* Lane count verified previously. */
+		for (i = 0; i < dp->lanes; i++) {
+			if (dp->voltage[i] > 3 || dp->pre[i] > 3)
+				return -EINVAL;
+
+			/*
+			 * Sum of voltage swing and pre-emphasis levels cannot
+			 * exceed 3.
+			 */
+			if (dp->voltage[i] + dp->pre[i] > 3)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void rk_udphy_dp_set_voltage(struct rk_udphy *udphy, u8 bw,
+				    u32 voltage, u32 pre, u32 lane)
+{
+	const struct rk_udphy_cfg *cfg = udphy->cfgs;
+	const struct rk_udphy_dp_tx_drv_ctrl (*dp_ctrl)[4];
+	u32 offset = 0x800 * lane;
+	u32 val;
+
+	if (udphy->mux)
+		dp_ctrl = cfg->dp_tx_ctrl_cfg_typec[bw];
+	else
+		dp_ctrl = cfg->dp_tx_ctrl_cfg[bw];
+
+	val = dp_ctrl[voltage][pre].trsv_reg0204;
+	regmap_write(udphy->pma_regmap, 0x0810 + offset, val);
+
+	val = dp_ctrl[voltage][pre].trsv_reg0205;
+	regmap_write(udphy->pma_regmap, 0x0814 + offset, val);
+
+	val = dp_ctrl[voltage][pre].trsv_reg0206;
+	regmap_write(udphy->pma_regmap, 0x0818 + offset, val);
+
+	val = dp_ctrl[voltage][pre].trsv_reg0207;
+	regmap_write(udphy->pma_regmap, 0x081c + offset, val);
+}
+
+static int rk_udphy_dp_phy_configure(struct phy *phy,
+				     union phy_configure_opts *opts)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+	struct phy_configure_opts_dp *dp = &opts->dp;
+	u32 i, val, lane;
+	int ret;
+
+	ret = rk_udphy_dp_phy_verify_config(udphy, dp);
+	if (ret)
+		return ret;
+
+	if (dp->set_rate) {
+		regmap_update_bits(udphy->pma_regmap, CMN_DP_RSTN_OFFSET,
+				   CMN_DP_CMN_RSTN, FIELD_PREP(CMN_DP_CMN_RSTN, 0x0));
+
+		switch (dp->link_rate) {
+		case 1620:
+			udphy->bw = DP_BW_RBR;
+			break;
+
+		case 2700:
+			udphy->bw = DP_BW_HBR;
+			break;
+
+		case 5400:
+			udphy->bw = DP_BW_HBR2;
+			break;
+
+		case 8100:
+			udphy->bw = DP_BW_HBR3;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		regmap_update_bits(udphy->pma_regmap, CMN_DP_LINK_OFFSET, CMN_DP_TX_LINK_BW,
+				   FIELD_PREP(CMN_DP_TX_LINK_BW, udphy->bw));
+		regmap_update_bits(udphy->pma_regmap, CMN_SSC_EN_OFFSET, CMN_ROPLL_SSC_EN,
+				   FIELD_PREP(CMN_ROPLL_SSC_EN, dp->ssc));
+		regmap_update_bits(udphy->pma_regmap, CMN_DP_RSTN_OFFSET, CMN_DP_CMN_RSTN,
+				   FIELD_PREP(CMN_DP_CMN_RSTN, 0x1));
+
+		ret = regmap_read_poll_timeout(udphy->pma_regmap, CMN_ANA_ROPLL_DONE_OFFSET, val,
+					       FIELD_GET(CMN_ANA_ROPLL_LOCK_DONE, val) &&
+					       FIELD_GET(CMN_ANA_ROPLL_AFC_DONE, val),
+					       0, 1000);
+		if (ret) {
+			dev_err(udphy->dev, "ROPLL is not lock, set_rate failed\n");
+			return ret;
+		}
+	}
+
+	if (dp->set_voltages) {
+		for (i = 0; i < dp->lanes; i++) {
+			lane = udphy->dp_lane_sel[i];
+			switch (dp->link_rate) {
+			case 1620:
+			case 2700:
+				regmap_update_bits(udphy->pma_regmap,
+						   TRSV_ANA_TX_CLK_OFFSET_N(lane),
+						   LN_ANA_TX_SER_TXCLK_INV,
+						   FIELD_PREP(LN_ANA_TX_SER_TXCLK_INV,
+						   udphy->lane_mux_sel[lane]));
+				break;
+
+			case 5400:
+			case 8100:
+				regmap_update_bits(udphy->pma_regmap,
+						   TRSV_ANA_TX_CLK_OFFSET_N(lane),
+						   LN_ANA_TX_SER_TXCLK_INV,
+						   FIELD_PREP(LN_ANA_TX_SER_TXCLK_INV, 0x0));
+				break;
+			}
+
+			rk_udphy_dp_set_voltage(udphy, udphy->bw, dp->voltage[i],
+						dp->pre[i], lane);
+		}
+	}
+
+	return 0;
+}
+
+static const struct phy_ops rk_udphy_dp_phy_ops = {
+	.init		= rk_udphy_dp_phy_init,
+	.exit		= rk_udphy_dp_phy_exit,
+	.power_on	= rk_udphy_dp_phy_power_on,
+	.power_off	= rk_udphy_dp_phy_power_off,
+	.configure	= rk_udphy_dp_phy_configure,
+	.owner		= THIS_MODULE,
+};
+
+static int rk_udphy_usb3_phy_init(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+	int ret = 0;
+
+	mutex_lock(&udphy->mutex);
+	/* DP only or high-speed, disable U3 port */
+	if (!(udphy->mode & UDPHY_MODE_USB) || udphy->hs) {
+		rk_udphy_u3_port_disable(udphy, true);
+		goto unlock;
+	}
+
+	ret = rk_udphy_power_on(udphy, UDPHY_MODE_USB);
+
+unlock:
+	mutex_unlock(&udphy->mutex);
+	return ret;
+}
+
+static int rk_udphy_usb3_phy_exit(struct phy *phy)
+{
+	struct rk_udphy *udphy = phy_get_drvdata(phy);
+
+	mutex_lock(&udphy->mutex);
+	/* DP only or high-speed */
+	if (!(udphy->mode & UDPHY_MODE_USB) || udphy->hs)
+		goto unlock;
+
+	rk_udphy_power_off(udphy, UDPHY_MODE_USB);
+
+unlock:
+	mutex_unlock(&udphy->mutex);
+	return 0;
+}
+
+static const struct phy_ops rk_udphy_usb3_phy_ops = {
+	.init		= rk_udphy_usb3_phy_init,
+	.exit		= rk_udphy_usb3_phy_exit,
+	.owner		= THIS_MODULE,
+};
+
+static int rk_udphy_typec_mux_set(struct typec_mux_dev *mux,
+				  struct typec_mux_state *state)
+{
+	struct rk_udphy *udphy = typec_mux_get_drvdata(mux);
+	u8 mode;
+
+	mutex_lock(&udphy->mutex);
+
+	switch (state->mode) {
+	case TYPEC_DP_STATE_C:
+	case TYPEC_DP_STATE_E:
+		udphy->lane_mux_sel[0] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[1] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[2] = PHY_LANE_MUX_DP;
+		udphy->lane_mux_sel[3] = PHY_LANE_MUX_DP;
+		mode = UDPHY_MODE_DP;
+		break;
+
+	case TYPEC_DP_STATE_D:
+	default:
+		if (udphy->flip) {
+			udphy->lane_mux_sel[0] = PHY_LANE_MUX_DP;
+			udphy->lane_mux_sel[1] = PHY_LANE_MUX_DP;
+			udphy->lane_mux_sel[2] = PHY_LANE_MUX_USB;
+			udphy->lane_mux_sel[3] = PHY_LANE_MUX_USB;
+		} else {
+			udphy->lane_mux_sel[0] = PHY_LANE_MUX_USB;
+			udphy->lane_mux_sel[1] = PHY_LANE_MUX_USB;
+			udphy->lane_mux_sel[2] = PHY_LANE_MUX_DP;
+			udphy->lane_mux_sel[3] = PHY_LANE_MUX_DP;
+		}
+		mode = UDPHY_MODE_DP_USB;
+		break;
+	}
+
+	if (state->alt && state->alt->svid == USB_TYPEC_DP_SID) {
+		struct typec_displayport_data *data = state->data;
+
+		if (!data) {
+			rk_udphy_dp_hpd_event_trigger(udphy, false);
+		} else if (data->status & DP_STATUS_IRQ_HPD) {
+			rk_udphy_dp_hpd_event_trigger(udphy, false);
+			usleep_range(750, 800);
+			rk_udphy_dp_hpd_event_trigger(udphy, true);
+		} else if (data->status & DP_STATUS_HPD_STATE) {
+			if (udphy->mode != mode) {
+				udphy->mode = mode;
+				udphy->mode_change = true;
+			}
+			rk_udphy_dp_hpd_event_trigger(udphy, true);
+		} else {
+			rk_udphy_dp_hpd_event_trigger(udphy, false);
+		}
+	}
+
+	mutex_unlock(&udphy->mutex);
+	return 0;
+}
+
+static void rk_udphy_typec_mux_unregister(void *data)
+{
+	struct rk_udphy *udphy = data;
+
+	typec_mux_unregister(udphy->mux);
+}
+
+static int rk_udphy_setup_typec_mux(struct rk_udphy *udphy)
+{
+	struct typec_mux_desc mux_desc = {};
+
+	mux_desc.drvdata = udphy;
+	mux_desc.fwnode = dev_fwnode(udphy->dev);
+	mux_desc.set = rk_udphy_typec_mux_set;
+
+	udphy->mux = typec_mux_register(udphy->dev, &mux_desc);
+	if (IS_ERR(udphy->mux)) {
+		dev_err(udphy->dev, "Error register typec mux: %ld\n",
+			PTR_ERR(udphy->mux));
+		return PTR_ERR(udphy->mux);
+	}
+
+	return devm_add_action_or_reset(udphy->dev, rk_udphy_typec_mux_unregister,
+					udphy);
+}
+
+static const struct regmap_config rk_udphy_pma_regmap_cfg = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 32,
+	.fast_io = true,
+	.max_register = 0x20dc,
+};
+
+static struct phy *rk_udphy_phy_xlate(struct device *dev, const struct of_phandle_args *args)
+{
+	struct rk_udphy *udphy = dev_get_drvdata(dev);
+
+	if (args->args_count == 0)
+		return ERR_PTR(-EINVAL);
+
+	switch (args->args[0]) {
+	case PHY_TYPE_USB3:
+		return udphy->phy_u3;
+	case PHY_TYPE_DP:
+		return udphy->phy_dp;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static int rk_udphy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	struct rk_udphy *udphy;
+	void __iomem *base;
+	int id, ret;
+
+	udphy = devm_kzalloc(dev, sizeof(*udphy), GFP_KERNEL);
+	if (!udphy)
+		return -ENOMEM;
+
+	udphy->cfgs = device_get_match_data(dev);
+	if (!udphy->cfgs)
+		return dev_err_probe(dev, -EINVAL, "missing match data\n");
+
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/* find the phy-id from the io address */
+	udphy->id = -ENODEV;
+	for (id = 0; id < udphy->cfgs->num_phys; id++) {
+		if (res->start == udphy->cfgs->phy_ids[id]) {
+			udphy->id = id;
+			break;
+		}
+	}
+
+	if (udphy->id < 0)
+		return dev_err_probe(dev, -ENODEV, "no matching device found\n");
+
+	udphy->pma_regmap = devm_regmap_init_mmio(dev, base + UDPHY_PMA,
+						  &rk_udphy_pma_regmap_cfg);
+	if (IS_ERR(udphy->pma_regmap))
+		return PTR_ERR(udphy->pma_regmap);
+
+	udphy->dev = dev;
+	ret = rk_udphy_parse_dt(udphy);
+	if (ret)
+		return ret;
+
+	ret = rk_udphy_get_initial_status(udphy);
+	if (ret)
+		return ret;
+
+	mutex_init(&udphy->mutex);
+	platform_set_drvdata(pdev, udphy);
+
+	if (device_property_present(dev, "orientation-switch")) {
+		ret = rk_udphy_setup_orien_switch(udphy);
+		if (ret)
+			return ret;
+	}
+
+	if (device_property_present(dev, "mode-switch")) {
+		ret = rk_udphy_setup_typec_mux(udphy);
+		if (ret)
+			return ret;
+	}
+
+	udphy->phy_u3 = devm_phy_create(dev, dev->of_node, &rk_udphy_usb3_phy_ops);
+	if (IS_ERR(udphy->phy_u3)) {
+		ret = PTR_ERR(udphy->phy_u3);
+		return dev_err_probe(dev, ret, "failed to create USB3 phy\n");
+	}
+	phy_set_drvdata(udphy->phy_u3, udphy);
+
+	udphy->phy_dp = devm_phy_create(dev, dev->of_node, &rk_udphy_dp_phy_ops);
+	if (IS_ERR(udphy->phy_dp)) {
+		ret = PTR_ERR(udphy->phy_dp);
+		return dev_err_probe(dev, ret, "failed to create DP phy\n");
+	}
+	phy_set_bus_width(udphy->phy_dp, rk_udphy_dplane_get(udphy));
+	udphy->phy_dp->attrs.max_link_rate = 8100;
+	phy_set_drvdata(udphy->phy_dp, udphy);
+
+	phy_provider = devm_of_phy_provider_register(dev, rk_udphy_phy_xlate);
+	if (IS_ERR(phy_provider)) {
+		ret = PTR_ERR(phy_provider);
+		return dev_err_probe(dev, ret, "failed to register phy provider\n");
+	}
+
+	return 0;
+}
+
+static int __maybe_unused rk_udphy_resume(struct device *dev)
+{
+	struct rk_udphy *udphy = dev_get_drvdata(dev);
+
+	if (udphy->dp_sink_hpd_sel)
+		rk_udphy_dp_hpd_event_trigger(udphy, udphy->dp_sink_hpd_cfg);
+
+	return 0;
+}
+
+static const struct dev_pm_ops rk_udphy_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, rk_udphy_resume)
+};
+
+static const char * const rk_udphy_rst_list[] = {
+	"init", "cmn", "lane", "pcs_apb", "pma_apb"
+};
+
+static const struct rk_udphy_cfg rk3588_udphy_cfgs = {
+	.num_phys = 2,
+	.phy_ids = {
+		0xfed80000,
+		0xfed90000,
+	},
+	.num_rsts = ARRAY_SIZE(rk_udphy_rst_list),
+	.rst_list = rk_udphy_rst_list,
+	.grfcfg	= {
+		/* u2phy-grf */
+		.bvalid_phy_con		= RK_UDPHY_GEN_GRF_REG(0x0008, 1, 0, 0x2, 0x3),
+		.bvalid_grf_con		= RK_UDPHY_GEN_GRF_REG(0x0010, 3, 2, 0x2, 0x3),
+
+		/* usb-grf */
+		.usb3otg0_cfg		= RK_UDPHY_GEN_GRF_REG(0x001c, 15, 0, 0x1100, 0x0188),
+		.usb3otg1_cfg		= RK_UDPHY_GEN_GRF_REG(0x0034, 15, 0, 0x1100, 0x0188),
+
+		/* usbdpphy-grf */
+		.low_pwrn		= RK_UDPHY_GEN_GRF_REG(0x0004, 13, 13, 0, 1),
+		.rx_lfps		= RK_UDPHY_GEN_GRF_REG(0x0004, 14, 14, 0, 1),
+	},
+	.vogrfcfg = {
+		{
+			.hpd_trigger	= RK_UDPHY_GEN_GRF_REG(0x0000, 11, 10, 1, 3),
+			.dp_lane_reg	= 0x0000,
+		},
+		{
+			.hpd_trigger	= RK_UDPHY_GEN_GRF_REG(0x0008, 11, 10, 1, 3),
+			.dp_lane_reg	= 0x0008,
+		},
+	},
+	.dp_tx_ctrl_cfg = {
+		rk3588_dp_tx_drv_ctrl_rbr_hbr,
+		rk3588_dp_tx_drv_ctrl_rbr_hbr,
+		rk3588_dp_tx_drv_ctrl_hbr2,
+		rk3588_dp_tx_drv_ctrl_hbr3,
+	},
+	.dp_tx_ctrl_cfg_typec = {
+		rk3588_dp_tx_drv_ctrl_rbr_hbr_typec,
+		rk3588_dp_tx_drv_ctrl_rbr_hbr_typec,
+		rk3588_dp_tx_drv_ctrl_hbr2,
+		rk3588_dp_tx_drv_ctrl_hbr3,
+	},
+};
+
+static const struct of_device_id rk_udphy_dt_match[] = {
+	{
+		.compatible = "rockchip,rk3588-usbdp-phy",
+		.data = &rk3588_udphy_cfgs
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rk_udphy_dt_match);
+
+static struct platform_driver rk_udphy_driver = {
+	.probe		= rk_udphy_probe,
+	.driver		= {
+		.name	= "rockchip-usbdp-phy",
+		.of_match_table = rk_udphy_dt_match,
+		.pm = &rk_udphy_pm_ops,
+	},
+};
+module_platform_driver(rk_udphy_driver);
+
+MODULE_AUTHOR("Frank Wang <frank.wang@rock-chips.com>");
+MODULE_AUTHOR("Zhang Yubing <yubing.zhang@rock-chips.com>");
+MODULE_DESCRIPTION("Rockchip USBDP Combo PHY driver");
+MODULE_LICENSE("GPL");
--
2.42.0


From 8796bccdf3d52a8fe8eaf2fab36896c315b98104 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Fri, 5 Jan 2024 18:38:43 +0100
Subject: [PATCH 25/69] arm64: defconfig: enable Rockchip Samsung USBDP PHY

The USBDP Phy is used by RK3588 to handle the Dual-Role USB3
controllers. The Phy also supports Displayport Alt-Mode, but
the necessary DRM driver has not yet been merged.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 65e33174f813..107c1553d24d 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1518,6 +1518,7 @@ CONFIG_PHY_ROCKCHIP_PCIE=m
 CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX=m
 CONFIG_PHY_ROCKCHIP_SNPS_PCIE3=y
 CONFIG_PHY_ROCKCHIP_TYPEC=y
+CONFIG_PHY_ROCKCHIP_USBDP=m
 CONFIG_PHY_SAMSUNG_UFS=y
 CONFIG_PHY_UNIPHIER_USB2=y
 CONFIG_PHY_UNIPHIER_USB3=y
--
2.42.0


From 7d6e45f970466dfdfa210a8940179f9d42d67aa0 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 13 Feb 2024 15:12:27 +0100
Subject: [PATCH 26/69] arm64: dts: rockchip: reorder usb2phy properties for
 rk3588

Reorder common DT properties alphabetically for usb2phy, according
to latest DT style rules.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 89d40cff635f..df7da3a67f45 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -602,13 +602,13 @@ usb2phy2_grf: syscon@fd5d8000 {
 		u2phy2: usb2-phy@8000 {
 			compatible = "rockchip,rk3588-usb2phy";
 			reg = <0x8000 0x10>;
-			interrupts = <GIC_SPI 391 IRQ_TYPE_LEVEL_HIGH 0>;
-			resets = <&cru SRST_OTGPHY_U2_0>, <&cru SRST_P_USB2PHY_U2_0_GRF0>;
-			reset-names = "phy", "apb";
+			#clock-cells = <0>;
 			clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>;
 			clock-names = "phyclk";
 			clock-output-names = "usb480m_phy2";
-			#clock-cells = <0>;
+			interrupts = <GIC_SPI 391 IRQ_TYPE_LEVEL_HIGH 0>;
+			resets = <&cru SRST_OTGPHY_U2_0>, <&cru SRST_P_USB2PHY_U2_0_GRF0>;
+			reset-names = "phy", "apb";
 			status = "disabled";

 			u2phy2_host: host-port {
@@ -627,13 +627,13 @@ usb2phy3_grf: syscon@fd5dc000 {
 		u2phy3: usb2-phy@c000 {
 			compatible = "rockchip,rk3588-usb2phy";
 			reg = <0xc000 0x10>;
-			interrupts = <GIC_SPI 392 IRQ_TYPE_LEVEL_HIGH 0>;
-			resets = <&cru SRST_OTGPHY_U2_1>, <&cru SRST_P_USB2PHY_U2_1_GRF0>;
-			reset-names = "phy", "apb";
+			#clock-cells = <0>;
 			clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>;
 			clock-names = "phyclk";
 			clock-output-names = "usb480m_phy3";
-			#clock-cells = <0>;
+			interrupts = <GIC_SPI 392 IRQ_TYPE_LEVEL_HIGH 0>;
+			resets = <&cru SRST_OTGPHY_U2_1>, <&cru SRST_P_USB2PHY_U2_1_GRF0>;
+			reset-names = "phy", "apb";
 			status = "disabled";

 			u2phy3_host: host-port {
--
2.42.0


From 796034b05f316a958f5275fe307284dc0bc7a02f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 12 Feb 2024 19:08:27 +0100
Subject: [PATCH 27/69] arm64: dts: rockchip: fix usb2phy nodename for rk3588

usb2-phy should be named usb2phy according to the DT binding,
so let's fix it up accordingly.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index df7da3a67f45..04069f80f06d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -599,7 +599,7 @@ usb2phy2_grf: syscon@fd5d8000 {
 		#address-cells = <1>;
 		#size-cells = <1>;

-		u2phy2: usb2-phy@8000 {
+		u2phy2: usb2phy@8000 {
 			compatible = "rockchip,rk3588-usb2phy";
 			reg = <0x8000 0x10>;
 			#clock-cells = <0>;
@@ -624,7 +624,7 @@ usb2phy3_grf: syscon@fd5dc000 {
 		#address-cells = <1>;
 		#size-cells = <1>;

-		u2phy3: usb2-phy@c000 {
+		u2phy3: usb2phy@c000 {
 			compatible = "rockchip,rk3588-usb2phy";
 			reg = <0xc000 0x10>;
 			#clock-cells = <0>;
--
2.42.0


From 584c645a1824b7928114c5b10edebac13756002c Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Apr 2023 17:49:04 +0200
Subject: [PATCH 28/69] arm64: dts: rockchip: add USBDP phys on rk3588

Add both USB3-DisplayPort PHYs to RK3588 SoC DT.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588.dtsi  | 52 +++++++++++++++++++
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 63 +++++++++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
index 5519c1430cb7..4fdd047c9eb9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
@@ -17,6 +17,36 @@ pipe_phy1_grf: syscon@fd5c0000 {
 		reg = <0x0 0xfd5c0000 0x0 0x100>;
 	};

+	usbdpphy1_grf: syscon@fd5cc000 {
+		compatible = "rockchip,rk3588-usbdpphy-grf", "syscon";
+		reg = <0x0 0xfd5cc000 0x0 0x4000>;
+	};
+
+	usb2phy1_grf: syscon@fd5d4000 {
+		compatible = "rockchip,rk3588-usb2phy-grf", "syscon", "simple-mfd";
+		reg = <0x0 0xfd5d4000 0x0 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		u2phy1: usb2phy@4000 {
+			compatible = "rockchip,rk3588-usb2phy";
+			reg = <0x4000 0x10>;
+			#clock-cells = <0>;
+			clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>;
+			clock-names = "phyclk";
+			clock-output-names = "usb480m_phy1";
+			interrupts = <GIC_SPI 394 IRQ_TYPE_LEVEL_HIGH 0>;
+			resets = <&cru SRST_OTGPHY_U3_1>, <&cru SRST_P_USB2PHY_U3_1_GRF0>;
+			reset-names = "phy", "apb";
+			status = "disabled";
+
+			u2phy1_otg: otg-port {
+				#phy-cells = <0>;
+				status = "disabled";
+			};
+		};
+	};
+
 	i2s8_8ch: i2s@fddc8000 {
 		compatible = "rockchip,rk3588-i2s-tdm";
 		reg = <0x0 0xfddc8000 0x0 0x1000>;
@@ -310,6 +340,28 @@ sata-port@0 {
 		};
 	};

+	usbdp_phy1: phy@fed90000 {
+		compatible = "rockchip,rk3588-usbdp-phy";
+		reg = <0x0 0xfed90000 0x0 0x10000>;
+		#phy-cells = <1>;
+		clocks = <&cru CLK_USBDPPHY_MIPIDCPPHY_REF>,
+			 <&cru CLK_USBDP_PHY1_IMMORTAL>,
+			 <&cru PCLK_USBDPPHY1>,
+			 <&u2phy1>;
+		clock-names = "refclk", "immortal", "pclk", "utmi";
+		resets = <&cru SRST_USBDP_COMBO_PHY1_INIT>,
+			 <&cru SRST_USBDP_COMBO_PHY1_CMN>,
+			 <&cru SRST_USBDP_COMBO_PHY1_LANE>,
+			 <&cru SRST_USBDP_COMBO_PHY1_PCS>,
+			 <&cru SRST_P_USBDPPHY1>;
+		reset-names = "init", "cmn", "lane", "pcs_apb", "pma_apb";
+		rockchip,u2phy-grf = <&usb2phy1_grf>;
+		rockchip,usb-grf = <&usb_grf>;
+		rockchip,usbdpphy-grf = <&usbdpphy1_grf>;
+		rockchip,vo-grf = <&vo0_grf>;
+		status = "disabled";
+	};
+
 	combphy1_ps: phy@fee10000 {
 		compatible = "rockchip,rk3588-naneng-combphy";
 		reg = <0x0 0xfee10000 0x0 0x100>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 04069f80f06d..44ccf38799fe 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -572,12 +572,23 @@ vop_grf: syscon@fd5a4000 {
 		reg = <0x0 0xfd5a4000 0x0 0x2000>;
 	};

+	vo0_grf: syscon@fd5a6000 {
+		compatible = "rockchip,rk3588-vo-grf", "syscon";
+		reg = <0x0 0xfd5a6000 0x0 0x2000>;
+		clocks = <&cru PCLK_VO0GRF>;
+	};
+
 	vo1_grf: syscon@fd5a8000 {
 		compatible = "rockchip,rk3588-vo-grf", "syscon";
 		reg = <0x0 0xfd5a8000 0x0 0x100>;
 		clocks = <&cru PCLK_VO1GRF>;
 	};

+	usb_grf: syscon@fd5ac000 {
+		compatible = "rockchip,rk3588-usb-grf", "syscon";
+		reg = <0x0 0xfd5ac000 0x0 0x4000>;
+	};
+
 	php_grf: syscon@fd5b0000 {
 		compatible = "rockchip,rk3588-php-grf", "syscon";
 		reg = <0x0 0xfd5b0000 0x0 0x1000>;
@@ -593,6 +604,36 @@ pipe_phy2_grf: syscon@fd5c4000 {
 		reg = <0x0 0xfd5c4000 0x0 0x100>;
 	};

+	usbdpphy0_grf: syscon@fd5c8000 {
+		compatible = "rockchip,rk3588-usbdpphy-grf", "syscon";
+		reg = <0x0 0xfd5c8000 0x0 0x4000>;
+	};
+
+	usb2phy0_grf: syscon@fd5d0000 {
+		compatible = "rockchip,rk3588-usb2phy-grf", "syscon", "simple-mfd";
+		reg = <0x0 0xfd5d0000 0x0 0x4000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		u2phy0: usb2phy@0 {
+			compatible = "rockchip,rk3588-usb2phy";
+			reg = <0x0 0x10>;
+			#clock-cells = <0>;
+			clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>;
+			clock-names = "phyclk";
+			clock-output-names = "usb480m_phy0";
+			interrupts = <GIC_SPI 393 IRQ_TYPE_LEVEL_HIGH 0>;
+			resets = <&cru SRST_OTGPHY_U3_0>, <&cru SRST_P_USB2PHY_U3_0_GRF0>;
+			reset-names = "phy", "apb";
+			status = "disabled";
+
+			u2phy0_otg: otg-port {
+				#phy-cells = <0>;
+				status = "disabled";
+			};
+		};
+	};
+
 	usb2phy2_grf: syscon@fd5d8000 {
 		compatible = "rockchip,rk3588-usb2phy-grf", "syscon", "simple-mfd";
 		reg = <0x0 0xfd5d8000 0x0 0x4000>;
@@ -2436,6 +2477,28 @@ hdptxphy_hdmi0: phy@fed60000 {
 		status = "disabled";
 	};

+	usbdp_phy0: phy@fed80000 {
+		compatible = "rockchip,rk3588-usbdp-phy";
+		reg = <0x0 0xfed80000 0x0 0x10000>;
+		#phy-cells = <1>;
+		clocks = <&cru CLK_USBDPPHY_MIPIDCPPHY_REF>,
+			 <&cru CLK_USBDP_PHY0_IMMORTAL>,
+			 <&cru PCLK_USBDPPHY0>,
+			 <&u2phy0>;
+		clock-names = "refclk", "immortal", "pclk", "utmi";
+		resets = <&cru SRST_USBDP_COMBO_PHY0_INIT>,
+			 <&cru SRST_USBDP_COMBO_PHY0_CMN>,
+			 <&cru SRST_USBDP_COMBO_PHY0_LANE>,
+			 <&cru SRST_USBDP_COMBO_PHY0_PCS>,
+			 <&cru SRST_P_USBDPPHY0>;
+		reset-names = "init", "cmn", "lane", "pcs_apb", "pma_apb";
+		rockchip,u2phy-grf = <&usb2phy0_grf>;
+		rockchip,usb-grf = <&usb_grf>;
+		rockchip,usbdpphy-grf = <&usbdpphy0_grf>;
+		rockchip,vo-grf = <&vo0_grf>;
+		status = "disabled";
+	};
+
 	combphy0_ps: phy@fee00000 {
 		compatible = "rockchip,rk3588-naneng-combphy";
 		reg = <0x0 0xfee00000 0x0 0x100>;
--
2.42.0


From 37bee0d690d311ed23ad6ab83c8d5714925f57e6 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 18 Jul 2023 19:05:38 +0200
Subject: [PATCH 29/69] arm64: dts: rockchip: add USB3 DRD controllers on
 rk3588

Add both USB3 dual-role controllers to the RK3588 devicetree.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588.dtsi  | 20 ++++++++++++++++++++
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 22 ++++++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
index 4fdd047c9eb9..5984016b5f96 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
@@ -7,6 +7,26 @@
 #include "rk3588-pinctrl.dtsi"

 / {
+	usb_host1_xhci: usb@fc400000 {
+		compatible = "rockchip,rk3588-dwc3", "snps,dwc3";
+		reg = <0x0 0xfc400000 0x0 0x400000>;
+		interrupts = <GIC_SPI 221 IRQ_TYPE_LEVEL_HIGH 0>;
+		clocks = <&cru REF_CLK_USB3OTG1>, <&cru SUSPEND_CLK_USB3OTG1>,
+			 <&cru ACLK_USB3OTG1>;
+		clock-names = "ref_clk", "suspend_clk", "bus_clk";
+		dr_mode = "otg";
+		phys = <&u2phy1_otg>, <&usbdp_phy1 PHY_TYPE_USB3>;
+		phy-names = "usb2-phy", "usb3-phy";
+		phy_type = "utmi_wide";
+		power-domains = <&power RK3588_PD_USB>;
+		resets = <&cru SRST_A_USB3OTG1>;
+		snps,dis_enblslpm_quirk;
+		snps,dis-u2-freeclk-exists-quirk;
+		snps,dis-del-phy-power-chg-quirk;
+		snps,dis-tx-ipgap-linecheck-quirk;
+		status = "disabled";
+	};
+
 	pcie30_phy_grf: syscon@fd5b8000 {
 		compatible = "rockchip,rk3588-pcie3-phy-grf", "syscon";
 		reg = <0x0 0xfd5b8000 0x0 0x10000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 44ccf38799fe..4f2d7b3738ad 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -436,6 +436,28 @@ scmi_shmem: sram@0 {
 		};
 	};

+	usb_host0_xhci: usb@fc000000 {
+		compatible = "rockchip,rk3588-dwc3", "snps,dwc3";
+		reg = <0x0 0xfc000000 0x0 0x400000>;
+		interrupts = <GIC_SPI 220 IRQ_TYPE_LEVEL_HIGH 0>;
+		clocks = <&cru REF_CLK_USB3OTG0>, <&cru SUSPEND_CLK_USB3OTG0>,
+			 <&cru ACLK_USB3OTG0>;
+		clock-names = "ref_clk", "suspend_clk", "bus_clk";
+		dr_mode = "otg";
+		phys = <&u2phy0_otg>, <&usbdp_phy0 PHY_TYPE_USB3>;
+		phy-names = "usb2-phy", "usb3-phy";
+		phy_type = "utmi_wide";
+		power-domains = <&power RK3588_PD_USB>;
+		resets = <&cru SRST_A_USB3OTG0>;
+		snps,dis_enblslpm_quirk;
+		snps,dis-u1-entry-quirk;
+		snps,dis-u2-entry-quirk;
+		snps,dis-u2-freeclk-exists-quirk;
+		snps,dis-del-phy-power-chg-quirk;
+		snps,dis-tx-ipgap-linecheck-quirk;
+		status = "disabled";
+	};
+
 	usb_host0_ehci: usb@fc800000 {
 		compatible = "rockchip,rk3588-ehci", "generic-ehci";
 		reg = <0x0 0xfc800000 0x0 0x40000>;
--
2.42.0


From a5e5df48a70cfe5bfa17e06306b3bd30e30ae2ae Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Wed, 26 Apr 2023 21:18:43 +0200
Subject: [PATCH 30/69] arm64: dts: rockchip: add USB3 to rk3588-evb1

Add support for the board's USB3 connectors. It has 1x USB Type-A
and 1x USB Type-C.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../boot/dts/rockchip/rk3588-evb1-v10.dts     | 143 ++++++++++++++++++
 1 file changed, 143 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index 56c019b25fa8..cb7990235f13 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -9,6 +9,7 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
 #include <dt-bindings/pinctrl/rockchip.h>
+#include <dt-bindings/usb/pd.h>
 #include "rk3588.dtsi"

 / {
@@ -224,6 +225,18 @@ vcc5v0_usb: vcc5v0-usb-regulator {
 		regulator-max-microvolt = <5000000>;
 		vin-supply = <&vcc5v0_usbdcin>;
 	};
+
+	vbus5v0_typec: vbus5v0-typec {
+		compatible = "regulator-fixed";
+		regulator-name = "vbus5v0_typec";
+		regulator-min-microvolt = <5000000>;
+		regulator-max-microvolt = <5000000>;
+		enable-active-high;
+		gpio = <&gpio4 RK_PD0 GPIO_ACTIVE_HIGH>;
+		vin-supply = <&vcc5v0_usb>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&typec5v_pwren>;
+	};
 };

 &combphy0_ps {
@@ -290,6 +303,56 @@ &gpu {
 &i2c2 {
 	status = "okay";

+	usbc0: usb-typec@22 {
+		compatible = "fcs,fusb302";
+		reg = <0x22>;
+		interrupt-parent = <&gpio3>;
+		interrupts = <RK_PB4 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usbc0_int>;
+		vbus-supply = <&vbus5v0_typec>;
+		status = "okay";
+
+		usb_con: connector {
+			compatible = "usb-c-connector";
+			label = "USB-C";
+			data-role = "dual";
+			power-role = "dual";
+			try-power-role = "source";
+			op-sink-microwatt = <1000000>;
+			sink-pdos =
+				<PDO_FIXED(5000, 1000, PDO_FIXED_USB_COMM)>;
+			source-pdos =
+				<PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					usbc0_orien_sw: endpoint {
+						remote-endpoint = <&usbdp_phy0_orientation_switch>;
+					};
+				};
+
+				port@1 {
+					reg = <1>;
+					usbc0_role_sw: endpoint {
+						remote-endpoint = <&dwc3_0_role_switch>;
+					};
+				};
+
+				port@2 {
+					reg = <2>;
+					dp_altmode_mux: endpoint {
+						remote-endpoint = <&usbdp_phy0_dp_altmode_mux>;
+					};
+				};
+			};
+		};
+	};
+
 	hym8563: rtc@51 {
 		compatible = "haoyu,hym8563";
 		reg = <0x51>;
@@ -416,6 +479,16 @@ vcc5v0_host_en: vcc5v0-host-en {
 			rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
 	};
+
+	usb-typec {
+		usbc0_int: usbc0-int {
+			rockchip,pins = <3 RK_PB4 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+
+		typec5v_pwren: typec5v-pwren {
+			rockchip,pins = <4 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
 };

 &pwm2 {
@@ -1055,6 +1128,22 @@ &sata0 {
 	status = "okay";
 };

+&u2phy0 {
+	status = "okay";
+};
+
+&u2phy0_otg {
+	status = "okay";
+};
+
+&u2phy1 {
+	status = "okay";
+};
+
+&u2phy1_otg {
+	status = "okay";
+};
+
 &u2phy2 {
 	status = "okay";
 };
@@ -1093,3 +1182,57 @@ &usb_host1_ehci {
 &usb_host1_ohci {
 	status = "okay";
 };
+
+&usbdp_phy0 {
+	orientation-switch;
+	mode-switch;
+	sbu1-dc-gpios = <&gpio4 RK_PA6 GPIO_ACTIVE_HIGH>;
+	sbu2-dc-gpios = <&gpio4 RK_PA7 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+
+	port {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		usbdp_phy0_orientation_switch: endpoint@0 {
+			reg = <0>;
+			remote-endpoint = <&usbc0_orien_sw>;
+		};
+
+		usbdp_phy0_dp_altmode_mux: endpoint@1 {
+			reg = <1>;
+			remote-endpoint = <&dp_altmode_mux>;
+		};
+	};
+};
+
+&usbdp_phy1 {
+	/*
+	 * USBDP PHY1 is wired to a female USB3 Type-A connector. Additionally
+	 * the differential pairs 2+3 and the aux channel are wired to a RTD2166,
+	 * which converts the DP signal into VGA. This is exposed on the
+	 * board via a female VGA connector.
+	 */
+	rockchip,dp-lane-mux = <2 3>;
+	status = "okay";
+};
+
+&usb_host0_xhci {
+	dr_mode = "otg";
+	usb-role-switch;
+	status = "okay";
+
+	port {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		dwc3_0_role_switch: endpoint@0 {
+			reg = <0>;
+			remote-endpoint = <&usbc0_role_sw>;
+		};
+	};
+};
+
+&usb_host1_xhci {
+	dr_mode = "host";
+	status = "okay";
+};
--
2.42.0


From 39bf1fd9d664f4537086610152c88f55cdb69c7e Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Jul 2023 16:30:46 +0200
Subject: [PATCH 31/69] arm64: dts: rockchip: add upper USB3 port to rock-5a

Enable full support (XHCI, EHCI, OHCI) for the upper USB3 port from
Radxa Rock 5 Model A. The lower one is already supported.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../boot/dts/rockchip/rk3588s-rock-5a.dts      | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
index 00afb90d4eb1..b8ca3ee4c89a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts
@@ -697,6 +697,14 @@ regulator-state-mem {
 	};
 };

+&u2phy0 {
+	status = "okay";
+};
+
+&u2phy0_otg {
+	status = "okay";
+};
+
 &u2phy2 {
 	status = "okay";
 };
@@ -720,6 +728,11 @@ &uart2 {
 	status = "okay";
 };

+&usbdp_phy0 {
+	status = "okay";
+	rockchip,dp-lane-mux = <2 3>;
+};
+
 &usb_host0_ehci {
 	status = "okay";
 	pinctrl-names = "default";
@@ -730,6 +743,11 @@ &usb_host0_ohci {
 	status = "okay";
 };

+&usb_host0_xhci {
+	dr_mode = "host";
+	status = "okay";
+};
+
 &usb_host1_ehci {
 	status = "okay";
 };
--
2.42.0


From b2a005745e0ea03524378567c58d4645e9e5b5e6 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Jul 2023 17:18:17 +0200
Subject: [PATCH 32/69] arm64: dts: rockchip: add lower USB3 port to rock-5b

Enable full support (XHCI, EHCI, OHCI) for the lower USB3 port from
Radxa Rock 5 Model B. The upper one is already supported.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index d6bf2ee07e87..b67b6c50d0dc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -747,6 +747,14 @@ &uart2 {
 	status = "okay";
 };

+&u2phy1 {
+	status = "okay";
+};
+
+&u2phy1_otg {
+	status = "okay";
+};
+
 &u2phy2 {
 	status = "okay";
 };
@@ -766,6 +774,10 @@ &u2phy3_host {
 	status = "okay";
 };

+&usbdp_phy1 {
+	status = "okay";
+};
+
 &usb_host0_ehci {
 	status = "okay";
 };
@@ -782,6 +794,11 @@ &usb_host1_ohci {
 	status = "okay";
 };

+&usb_host1_xhci {
+	dr_mode = "host";
+	status = "okay";
+};
+
 &usb_host2_xhci {
 	status = "okay";
 };
--
2.42.0


From 85302d0f54e5c0fe6351c7ce77b4d387888b37e3 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 25 Jul 2023 18:35:56 +0200
Subject: [PATCH 33/69] [BROKEN] arm64: dts: rockchip: rk3588-rock5b: add USB-C
 support

Add support for using the Radxa Rock 5 Model B USB-C port for USB in
OHCI, EHCI or XHCI mode. Displayport AltMode is not yet supported.

Note: Enabling support for the USB-C port results in a board reset
when the system is supplied with a PD capable power-supply. Until
this has been analyzed and fixed, let's disable support for the
Type-C port.

For details check this Gitlab issue:
https://gitlab.collabora.com/hardware-enablement/rockchip-3588/linux/-/issues/8

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../boot/dts/rockchip/rk3588-rock-5b.dts      | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index b67b6c50d0dc..6fe97c12fc84 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -4,6 +4,7 @@

 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/leds/common.h>
+#include <dt-bindings/usb/pd.h>
 #include "rk3588.dtsi"

 / {
@@ -65,6 +66,15 @@ rfkill {
 		shutdown-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
 	};

+	vcc12v_dcin: vcc12v-dcin-regulator {
+		compatible = "regulator-fixed";
+		regulator-name = "vcc12v_dcin";
+		regulator-always-on;
+		regulator-boot-on;
+		regulator-min-microvolt = <12000000>;
+		regulator-max-microvolt = <12000000>;
+	};
+
 	vcc3v3_pcie2x1l0: vcc3v3-pcie2x1l0-regulator {
 		compatible = "regulator-fixed";
 		enable-active-high;
@@ -123,6 +133,7 @@ vcc5v0_sys: vcc5v0-sys-regulator {
 		regulator-boot-on;
 		regulator-min-microvolt = <5000000>;
 		regulator-max-microvolt = <5000000>;
+		vin-supply = <&vcc12v_dcin>;
 	};

 	vcc_1v1_nldo_s3: vcc-1v1-nldo-s3-regulator {
@@ -225,6 +236,61 @@ regulator-state-mem {
 	};
 };

+&i2c4 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&i2c4m1_xfer>;
+	status = "okay";
+
+	usbc0: usb-typec@22 {
+		compatible = "fcs,fusb302";
+		reg = <0x22>;
+		interrupt-parent = <&gpio3>;
+		interrupts = <RK_PB4 IRQ_TYPE_LEVEL_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&usbc0_int>;
+		vbus-supply = <&vcc12v_dcin>;
+		status = "disabled";
+
+		usb_con: connector {
+			compatible = "usb-c-connector";
+			label = "USB-C";
+			data-role = "dual";
+			power-role = "sink";
+			try-power-role = "sink";
+			op-sink-microwatt = <1000000>;
+			sink-pdos =
+				<PDO_FIXED(5000, 3000, PDO_FIXED_USB_COMM)>,
+				<PDO_VAR(5000, 20000, 5000)>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					usbc0_hs: endpoint {
+						remote-endpoint = <&usb_host0_xhci_drd_sw>;
+					};
+				};
+
+				port@1 {
+					reg = <1>;
+					usbc0_ss: endpoint {
+						remote-endpoint = <&usbdp_phy0_typec_ss>;
+					};
+				};
+
+				port@2 {
+					reg = <2>;
+					usbc0_sbu: endpoint {
+						remote-endpoint = <&usbdp_phy0_typec_sbu>;
+					};
+				};
+			};
+		};
+	};
+};
+
 &i2c6 {
 	status = "okay";

@@ -354,6 +420,10 @@ usb {
 		vcc5v0_host_en: vcc5v0-host-en {
 			rockchip,pins = <4 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
+
+		usbc0_int: usbc0-int {
+			rockchip,pins = <3 RK_PB4 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
 	};
 };

@@ -747,6 +817,14 @@ &uart2 {
 	status = "okay";
 };

+&u2phy0 {
+	status = "okay";
+};
+
+&u2phy0_otg {
+	status = "okay";
+};
+
 &u2phy1 {
 	status = "okay";
 };
@@ -778,6 +856,29 @@ &usbdp_phy1 {
 	status = "okay";
 };

+&usbdp_phy0 {
+	mode-switch;
+	orientation-switch;
+	sbu1-dc-gpios = <&gpio4 RK_PA6 GPIO_ACTIVE_HIGH>;
+	sbu2-dc-gpios = <&gpio4 RK_PA7 GPIO_ACTIVE_HIGH>;
+	status = "okay";
+
+	port {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		usbdp_phy0_typec_ss: endpoint@0 {
+			reg = <0>;
+			remote-endpoint = <&usbc0_ss>;
+		};
+
+		usbdp_phy0_typec_sbu: endpoint@1 {
+			reg = <1>;
+			remote-endpoint = <&usbc0_sbu>;
+		};
+	};
+};
+
 &usb_host0_ehci {
 	status = "okay";
 };
@@ -786,6 +887,20 @@ &usb_host0_ohci {
 	status = "okay";
 };

+&usb_host0_xhci {
+	usb-role-switch;
+	status = "okay";
+
+	port {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		usb_host0_xhci_drd_sw: endpoint {
+			remote-endpoint = <&usbc0_hs>;
+		};
+	};
+};
+
 &usb_host1_ehci {
 	status = "okay";
 };
--
2.42.0


From a72d8d6aae140e2e8c533bf92e041445eb6a8b39 Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Thu, 29 Feb 2024 23:26:32 +0400
Subject: [PATCH 34/69] arm64: dts: rockchip: enable built-in thermal
 monitoring on RK3588

Include thermal zones information in device tree for RK3588 variants.

This also enables the TSADC controller unconditionally on all boards
to ensure that thermal protections are in place via throttling and
emergency reset, once OPPs are added to enable CPU DVFS.

The default settings (using CRU as the emergency reset mechanism)
should work on all boards regardless of their wiring, as CRU resets
do not depend on any external components. Boards that have the TSHUT
signal wired to the reset line of the PMIC may opt to switch to GPIO
tshut mode instead (rockchip,hw-tshut-mode = <1>;)

It seems though that downstream kernels don't use that, even for
those boards where the wiring allows for GPIO based tshut, such as
Radxa Rock 5B [1], [2], [3]

[1] https://github.com/radxa/kernel/blob/stable-5.10-rock5/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts#L540
[2] https://github.com/radxa/kernel/blob/stable-5.10-rock5/arch/arm64/boot/dts/rockchip/rk3588s.dtsi#L5433
[3] https://dl.radxa.com/rock5/5b/docs/hw/radxa_rock_5b_v1423_sch.pdf page 11 (TSADC_SHUT_H)

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Link: https://lore.kernel.org/r/20240229-rk-dts-additions-v3-1-6afe8473a631@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 176 +++++++++++++++++++++-
 1 file changed, 175 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 4f2d7b3738ad..732d1ac54fe3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -10,6 +10,7 @@
 #include <dt-bindings/reset/rockchip,rk3588-cru.h>
 #include <dt-bindings/phy/phy.h>
 #include <dt-bindings/ata/ahci.h>
+#include <dt-bindings/thermal/thermal.h>

 / {
 	compatible = "rockchip,rk3588";
@@ -2348,7 +2349,180 @@ tsadc: tsadc@fec00000 {
 		pinctrl-1 = <&tsadc_shut>;
 		pinctrl-names = "gpio", "otpout";
 		#thermal-sensor-cells = <1>;
-		status = "disabled";
+		status = "okay";
+	};
+
+	thermal_zones: thermal-zones {
+		/* sensor near the center of the SoC */
+		package_thermal: package-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 0>;
+
+			trips {
+				package_crit: package-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		/* sensor between A76 cores 0 and 1 */
+		bigcore0_thermal: bigcore0-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 1>;
+
+			trips {
+				/* threshold to start collecting temperature
+				 * statistics e.g. with the IPA governor
+				 */
+				bigcore0_alert0: bigcore0-alert0 {
+					temperature = <75000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				/* actual control temperature */
+				bigcore0_alert1: bigcore0-alert1 {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore0_crit: bigcore0-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&bigcore0_alert1>;
+					cooling-device =
+						<&cpu_b0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor between A76 cores 2 and 3 */
+		bigcore2_thermal: bigcore2-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 2>;
+
+			trips {
+				/* threshold to start collecting temperature
+				 * statistics e.g. with the IPA governor
+				 */
+				bigcore2_alert0: bigcore2-alert0 {
+					temperature = <75000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				/* actual control temperature */
+				bigcore2_alert1: bigcore2-alert1 {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				bigcore2_crit: bigcore2-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&bigcore2_alert1>;
+					cooling-device =
+						<&cpu_b2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_b3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor between the four A55 cores */
+		little_core_thermal: littlecore-thermal {
+			polling-delay-passive = <100>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 3>;
+
+			trips {
+				/* threshold to start collecting temperature
+				 * statistics e.g. with the IPA governor
+				 */
+				littlecore_alert0: littlecore-alert0 {
+					temperature = <75000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				/* actual control temperature */
+				littlecore_alert1: littlecore-alert1 {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				littlecore_crit: littlecore-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+			cooling-maps {
+				map0 {
+					trip = <&littlecore_alert1>;
+					cooling-device =
+						<&cpu_l0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l2 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>,
+						<&cpu_l3 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+
+		/* sensor near the PD_CENTER power domain */
+		center_thermal: center-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 4>;
+
+			trips {
+				center_crit: center-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		gpu_thermal: gpu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 5>;
+
+			trips {
+				gpu_crit: gpu-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
+
+		npu_thermal: npu-thermal {
+			polling-delay-passive = <0>;
+			polling-delay = <0>;
+			thermal-sensors = <&tsadc 6>;
+
+			trips {
+				npu_crit: npu-crit {
+					temperature = <115000>;
+					hysteresis = <0>;
+					type = "critical";
+				};
+			};
+		};
 	};

 	saradc: adc@fec10000 {
--
2.42.0


From 52906e8870763fc2d41d2313f3dc2ba0dcb0e0a0 Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Thu, 29 Feb 2024 23:26:33 +0400
Subject: [PATCH 35/69] arm64: dts: rockchip: enable automatic active cooling
 on Rock 5B

This links the PWM fan on Radxa Rock 5B as an active cooling device
managed automatically by the thermal subsystem, with a target SoC
temperature of 65C and a minimum-spin interval from 55C to 65C to
ensure airflow when the system gets warm

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Helped-by: Dragan Simic <dsimic@manjaro.org>
Reviewed-by: Dragan Simic <dsimic@manjaro.org>
Link: https://lore.kernel.org/r/20240229-rk-dts-additions-v3-2-6afe8473a631@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../boot/dts/rockchip/rk3588-rock-5b.dts      | 30 ++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index 6fe97c12fc84..fa3b91613a24 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -53,7 +53,7 @@ led_rgb_b {

 	fan: pwm-fan {
 		compatible = "pwm-fan";
-		cooling-levels = <0 95 145 195 255>;
+		cooling-levels = <0 120 150 180 210 240 255>;
 		fan-supply = <&vcc5v0_sys>;
 		pwms = <&pwm1 0 50000 0>;
 		#cooling-cells = <2>;
@@ -345,6 +345,34 @@ i2s0_8ch_p0_0: endpoint {
 	};
 };

+&package_thermal {
+	polling-delay = <1000>;
+
+	trips {
+		package_fan0: package-fan0 {
+			hysteresis = <2000>;
+			temperature = <55000>;
+			type = "active";
+		};
+		package_fan1: package-fan1 {
+			hysteresis = <2000>;
+			temperature = <65000>;
+			type = "active";
+		};
+	};
+
+	cooling-maps {
+		map1 {
+			cooling-device = <&fan THERMAL_NO_LIMIT 1>;
+			trip = <&package_fan0>;
+		};
+		map2 {
+			cooling-device = <&fan 2 THERMAL_NO_LIMIT>;
+			trip = <&package_fan1>;
+		};
+	};
+};
+
 &pcie2x1l0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&pcie2_0_rst>;
--
2.42.0


From cd5e750244b1d3b71cb1ee1219f2fc0cebd486ce Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Thu, 29 Feb 2024 23:26:34 +0400
Subject: [PATCH 36/69] arm64: dts: rockchip: Add CPU/memory regulator coupling
 for RK3588

RK3588 chips allow for their CPU cores to be powered by a different
supply vs. their corresponding memory interfaces, and two of the
boards currently upstream do that (EVB1 and QuartzPro64).

The voltage of the memory interface though has to match that of the
CPU cores that use it, which downstream kernels achieve by the means
of a custom cpufreq driver which adjusts both at the same time.

It seems that regulator coupling is a more appropriate generic
interface for it, so this patch introduces coupling to affected
device trees to ensure that memory interface voltage is also updated
whenever cpufreq switches between CPU OPPs.

Note that other boards, such as Radxa Rock 5B, define both the CPU
and memory interface regulators as aliases to the same DT node, so
this doesn't apply there.

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Link: https://lore.kernel.org/r/20240229-rk-dts-additions-v3-3-6afe8473a631@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts    | 12 ++++++++++++
 arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index cb7990235f13..a88e6730093d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -875,6 +875,8 @@ regulators {
 			vdd_cpu_big1_s0: dcdc-reg1 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big1_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -887,6 +889,8 @@ regulator-state-mem {
 			vdd_cpu_big0_s0: dcdc-reg2 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big0_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -899,6 +903,8 @@ regulator-state-mem {
 			vdd_cpu_lit_s0: dcdc-reg3 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_lit_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
@@ -923,6 +929,8 @@ regulator-state-mem {
 			vdd_cpu_big1_mem_s0: dcdc-reg5 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big1_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -936,6 +944,8 @@ regulator-state-mem {
 			vdd_cpu_big0_mem_s0: dcdc-reg6 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big0_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -960,6 +970,8 @@ regulator-state-mem {
 			vdd_cpu_lit_mem_s0: dcdc-reg8 {
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_lit_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
index 67414d72e2b6..8ca79a7034e1 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-quartzpro64.dts
@@ -817,6 +817,8 @@ vdd_cpu_big1_s0: dcdc-reg1 {
 				regulator-name = "vdd_cpu_big1_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big1_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -830,6 +832,8 @@ vdd_cpu_big0_s0: dcdc-reg2 {
 				regulator-name = "vdd_cpu_big0_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big0_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -843,6 +847,8 @@ vdd_cpu_lit_s0: dcdc-reg3 {
 				regulator-name = "vdd_cpu_lit_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_lit_mem_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <550000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
@@ -869,6 +875,8 @@ vdd_cpu_big1_mem_s0: dcdc-reg5 {
 				regulator-name = "vdd_cpu_big1_mem_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big1_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -883,6 +891,8 @@ vdd_cpu_big0_mem_s0: dcdc-reg6 {
 				regulator-name = "vdd_cpu_big0_mem_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_big0_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <1050000>;
 				regulator-ramp-delay = <12500>;
@@ -909,6 +919,8 @@ vdd_cpu_lit_mem_s0: dcdc-reg8 {
 				regulator-name = "vdd_cpu_lit_mem_s0";
 				regulator-always-on;
 				regulator-boot-on;
+				regulator-coupled-with = <&vdd_cpu_lit_s0>;
+				regulator-coupled-max-spread = <10000>;
 				regulator-min-microvolt = <675000>;
 				regulator-max-microvolt = <950000>;
 				regulator-ramp-delay = <12500>;
--
2.42.0


From dca333f0e8f5ffd113730b394d56db9dd2a09be5 Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Thu, 29 Feb 2024 23:26:35 +0400
Subject: [PATCH 37/69] arm64: dts: rockchip: Add OPP data for CPU cores on
 RK3588

By default the CPUs on RK3588 start up in a conservative performance
mode. Add frequency and voltage mappings to the device tree to enable
dynamic scaling via cpufreq.

OPP values are adapted from Radxa's downstream kernel for Rock 5B [1],
stripping them down to the minimum frequency and voltage combinations
as expected by the generic upstream cpufreq-dt driver, and also dropping
those OPPs that don't differ in voltage but only in frequency (keeping
the top frequency OPP in each case).

Note that this patch ignores voltage scaling for the CPU memory
interface which the downstream kernel does through a custom cpufreq
driver, and which is why the downstream version has two sets of voltage
values for each OPP (the second one being meant for the memory
interface supply regulator). This is done instead via regulator
coupling between CPU and memory interface supplies on affected boards.

This has been tested on Rock 5B with u-boot 2023.11 compiled from
Collabora's integration tree [2] with binary bl31 and appears to be
stable both under active cooling and passive cooling (with throttling)

[1] https://github.com/radxa/kernel/blob/stable-5.10-rock5/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
[2] https://gitlab.collabora.com/hardware-enablement/rockchip-3588/u-boot

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Link: https://lore.kernel.org/r/20240229-rk-dts-additions-v3-4-6afe8473a631@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 122 ++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 732d1ac54fe3..5cf59e0220b7 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -97,6 +97,7 @@ cpu_l0: cpu@0 {
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -116,6 +117,7 @@ cpu_l1: cpu@100 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -135,6 +137,7 @@ cpu_l2: cpu@200 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -154,6 +157,7 @@ cpu_l3: cpu@300 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <530>;
 			clocks = <&scmi_clk SCMI_CLK_CPUL>;
+			operating-points-v2 = <&cluster0_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <32768>;
 			i-cache-line-size = <64>;
@@ -175,6 +179,7 @@ cpu_b0: cpu@400 {
 			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster1_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -194,6 +199,7 @@ cpu_b1: cpu@500 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <1024>;
 			clocks = <&scmi_clk SCMI_CLK_CPUB01>;
+			operating-points-v2 = <&cluster1_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -215,6 +221,7 @@ cpu_b2: cpu@600 {
 			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
 			assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>;
 			assigned-clock-rates = <816000000>;
+			operating-points-v2 = <&cluster2_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -234,6 +241,7 @@ cpu_b3: cpu@700 {
 			enable-method = "psci";
 			capacity-dmips-mhz = <1024>;
 			clocks = <&scmi_clk SCMI_CLK_CPUB23>;
+			operating-points-v2 = <&cluster2_opp_table>;
 			cpu-idle-states = <&CPU_SLEEP>;
 			i-cache-size = <65536>;
 			i-cache-line-size = <64>;
@@ -348,6 +356,120 @@ l3_cache: l3-cache {
 		};
 	};

+	cluster0_opp_table: opp-table-cluster0 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <712500 712500 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <762500 762500 950000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <850000 850000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <950000 950000 950000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
+	cluster1_opp_table: opp-table-cluster1 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <725000 725000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <762500 762500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <850000 850000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2016000000 {
+			opp-hz = /bits/ 64 <2016000000>;
+			opp-microvolt = <925000 925000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2208000000 {
+			opp-hz = /bits/ 64 <2208000000>;
+			opp-microvolt = <987500 987500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2400000000 {
+			opp-hz = /bits/ 64 <2400000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
+	cluster2_opp_table: opp-table-cluster2 {
+		compatible = "operating-points-v2";
+		opp-shared;
+
+		opp-1200000000 {
+			opp-hz = /bits/ 64 <1200000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1416000000 {
+			opp-hz = /bits/ 64 <1416000000>;
+			opp-microvolt = <725000 725000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1608000000 {
+			opp-hz = /bits/ 64 <1608000000>;
+			opp-microvolt = <762500 762500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1800000000 {
+			opp-hz = /bits/ 64 <1800000000>;
+			opp-microvolt = <850000 850000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2016000000 {
+			opp-hz = /bits/ 64 <2016000000>;
+			opp-microvolt = <925000 925000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2208000000 {
+			opp-hz = /bits/ 64 <2208000000>;
+			opp-microvolt = <987500 987500 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2400000000 {
+			opp-hz = /bits/ 64 <2400000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+	};
+
 	firmware {
 		optee: optee {
 			compatible = "linaro,optee-tz";
--
2.42.0


From 4138742559dc6cff89a4b161d2da21d56d4dd0f3 Mon Sep 17 00:00:00 2001
From: Alexey Charkov <alchark@gmail.com>
Date: Thu, 29 Feb 2024 23:26:36 +0400
Subject: [PATCH 38/69] arm64: dts: rockchip: Add further granularity in RK3588
 CPU OPPs

This introduces additional OPPs that share the same voltage as
another OPP already present in the .dtsi but with lower frequency.

The idea is to try and limit system throughput more gradually upon
reaching the throttling condition for workloads that are close to
sustainable power already, thus avoiding needless performance loss.

My limited synthetic benchmarking [1] showed around 3.8% performance
benefit when these are in place, other things equal (not meant to
be comprehensive). Though dmesg complains about these OPPs being
'inefficient':

[    9.009561] cpu cpu0: EM: OPP:816000 is inefficient
[    9.009580] cpu cpu0: EM: OPP:600000 is inefficient
[    9.009591] cpu cpu0: EM: OPP:408000 is inefficient
[    9.011370] cpu cpu4: EM: OPP:2352000 is inefficient
[    9.011379] cpu cpu4: EM: OPP:2304000 is inefficient
[    9.011384] cpu cpu4: EM: OPP:2256000 is inefficient
[    9.011389] cpu cpu4: EM: OPP:600000 is inefficient
[    9.011393] cpu cpu4: EM: OPP:408000 is inefficient
[    9.012978] cpu cpu6: EM: OPP:2352000 is inefficient
[    9.012987] cpu cpu6: EM: OPP:2304000 is inefficient
[    9.012992] cpu cpu6: EM: OPP:2256000 is inefficient
[    9.012996] cpu cpu6: EM: OPP:600000 is inefficient
[    9.013000] cpu cpu6: EM: OPP:408000 is inefficient

[1] https://lore.kernel.org/linux-rockchip/CABjd4YxqarUCbZ-a2XLe3TWJ-qjphGkyq=wDnctnEhdoSdPPpw@mail.gmail.com/T/#me92aa0ee25e6eeb1d1501ce85f5af4e58b3b13c5

Signed-off-by: Alexey Charkov <alchark@gmail.com>
Link: https://lore.kernel.org/r/20240229-rk-dts-additions-v3-5-6afe8473a631@gmail.com
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 87 +++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 5cf59e0220b7..d0d47ac2e3cc 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -360,6 +360,21 @@ cluster0_opp_table: opp-table-cluster0 {
 		compatible = "operating-points-v2";
 		opp-shared;

+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 950000>;
+			clock-latency-ns = <40000>;
+		};
 		opp-1008000000 {
 			opp-hz = /bits/ 64 <1008000000>;
 			opp-microvolt = <675000 675000 950000>;
@@ -392,6 +407,27 @@ cluster1_opp_table: opp-table-cluster1 {
 		compatible = "operating-points-v2";
 		opp-shared;

+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
 		opp-1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <675000 675000 1000000>;
@@ -422,6 +458,21 @@ opp-2208000000 {
 			opp-microvolt = <987500 987500 1000000>;
 			clock-latency-ns = <40000>;
 		};
+		opp-2256000000 {
+			opp-hz = /bits/ 64 <2256000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2304000000 {
+			opp-hz = /bits/ 64 <2304000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2352000000 {
+			opp-hz = /bits/ 64 <2352000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
 		opp-2400000000 {
 			opp-hz = /bits/ 64 <2400000000>;
 			opp-microvolt = <1000000 1000000 1000000>;
@@ -433,6 +484,27 @@ cluster2_opp_table: opp-table-cluster2 {
 		compatible = "operating-points-v2";
 		opp-shared;

+		opp-408000000 {
+			opp-hz = /bits/ 64 <408000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+			opp-suspend;
+		};
+		opp-600000000 {
+			opp-hz = /bits/ 64 <600000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-816000000 {
+			opp-hz = /bits/ 64 <816000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-1008000000 {
+			opp-hz = /bits/ 64 <1008000000>;
+			opp-microvolt = <675000 675000 1000000>;
+			clock-latency-ns = <40000>;
+		};
 		opp-1200000000 {
 			opp-hz = /bits/ 64 <1200000000>;
 			opp-microvolt = <675000 675000 1000000>;
@@ -463,6 +535,21 @@ opp-2208000000 {
 			opp-microvolt = <987500 987500 1000000>;
 			clock-latency-ns = <40000>;
 		};
+		opp-2256000000 {
+			opp-hz = /bits/ 64 <2256000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2304000000 {
+			opp-hz = /bits/ 64 <2304000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
+		opp-2352000000 {
+			opp-hz = /bits/ 64 <2352000000>;
+			opp-microvolt = <1000000 1000000 1000000>;
+			clock-latency-ns = <40000>;
+		};
 		opp-2400000000 {
 			opp-hz = /bits/ 64 <2400000000>;
 			opp-microvolt = <1000000 1000000 1000000>;
--
2.42.0


From 80eeace4afb821446afd76442705245763343529 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Fri, 14 Jul 2023 17:38:24 +0200
Subject: [PATCH 39/69] [BROKEN] arm64: dts: rockchip: rk3588-evb1: add PCIe2
 WLAN controller

Enable PCIe bus used by on-board PCIe Broadcom WLAN controller.

The WLAN controller is not detected. There are two reasons for
that.

First of all it is necessary to keep the HYM8563 clock enabled, but it
is disabled because the kernel does not know about the dependency and
disables any clocks it deems unused.

Apart from that it looks like the controller needs a long time to be
properly initialized. So detection only works when rescanning the bus
some time after boot. This still needs to be investigated.

Both of these issues should be fixable by implementing a pwrseq driver
once the following series has landed:

https://lore.kernel.org/all/20240104130123.37115-1-brgl@bgdev.pl/

In addition to the above issues, the mainline brcmfmac driver does
not yet support the chip version used by AP6275P.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../boot/dts/rockchip/rk3588-evb1-v10.dts     | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index a88e6730093d..2490be5a5e18 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -120,6 +120,15 @@ backlight: backlight {
 		pwms = <&pwm2 0 25000 0>;
 	};

+	wlan-rfkill {
+		compatible = "rfkill-gpio";
+		label = "rfkill-pcie-wlan";
+		radio-type = "wlan";
+		shutdown-gpios = <&gpio3 RK_PB1 GPIO_ACTIVE_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&wifi_pwren>;
+	};
+
 	pcie20_avdd0v85: pcie20-avdd0v85-regulator {
 		compatible = "regulator-fixed";
 		regulator-name = "pcie20_avdd0v85";
@@ -237,12 +246,36 @@ vbus5v0_typec: vbus5v0-typec {
 		pinctrl-names = "default";
 		pinctrl-0 = <&typec5v_pwren>;
 	};
+
+	vccio_wl: vccio-wl {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-vddio";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc_1v8_s0>;
+	};
+
+	vcc3v3_pciewl_vbat: vcc3v3-pciewl-vbat {
+		compatible = "regulator-fixed";
+		regulator-name = "wlan-vbat";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-always-on;
+		regulator-boot-on;
+		vin-supply = <&vcc_3v3_s0>;
+	};
 };

 &combphy0_ps {
 	status = "okay";
 };

+&combphy1_ps {
+	status = "okay";
+};
+
 &combphy2_psu {
 	status = "okay";
 };
@@ -405,6 +438,12 @@ rgmii_phy: ethernet-phy@1 {
 	};
 };

+&pcie2x1l0 {
+	reset-gpios = <&gpio4 RK_PA5 GPIO_ACTIVE_HIGH>;
+	pinctrl-0 = <&pcie2_0_rst>, <&pcie2_0_wake>, <&pcie2_0_clkreq>, <&wifi_host_wake_irq>;
+	status = "okay";
+};
+
 &pcie2x1l1 {
 	reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
 	pinctrl-names = "default";
@@ -459,6 +498,18 @@ hym8563_int: hym8563-int {
 	};

 	pcie2 {
+		pcie2_0_rst: pcie2-0-rst {
+			rockchip,pins = <4 RK_PA5 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+
+		pcie2_0_wake: pcie2-0-wake {
+			rockchip,pins = <4 RK_PA4 4 &pcfg_pull_none>;
+		};
+
+		pcie2_0_clkreq: pcie2-0-clkreq {
+			rockchip,pins = <4 RK_PA3 4 &pcfg_pull_none>;
+		};
+
 		pcie2_1_rst: pcie2-1-rst {
 			rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
@@ -489,6 +540,16 @@ typec5v_pwren: typec5v-pwren {
 			rockchip,pins = <4 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
 	};
+
+	wlan {
+		wifi_host_wake_irq: wifi-host-wake-irq {
+			rockchip,pins = <3 RK_PA7 RK_FUNC_GPIO &pcfg_pull_down>;
+		};
+
+		wifi_pwren: wifi-pwren {
+			rockchip,pins = <3 RK_PB1 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
 };

 &pwm2 {
--
2.42.0


From a462a0a7cc92d80933c5e839d003a9fe1a33de58 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 11 Mar 2024 18:05:34 +0100
Subject: [PATCH 40/69] clk: rockchip: rk3588: drop unused code

All clocks are registered early using CLK_OF_DECLARE(), which marks
the DT node as processed. For the processed DT node the probe routine
is never called. Thus this whole code is never executed. This could
be "fixed" by using CLK_OF_DECLARE_DRIVER, which avoids marking the
DT node as processed. But then the probe routine would re-register
all the clocks by calling rk3588_clk_init() again.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk-rk3588.c | 40 -------------------------------
 1 file changed, 40 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index b30279a96dc8..74051277ecea 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -2502,43 +2502,3 @@ static void __init rk3588_clk_init(struct device_node *np)
 }

 CLK_OF_DECLARE(rk3588_cru, "rockchip,rk3588-cru", rk3588_clk_init);
-
-struct clk_rk3588_inits {
-	void (*inits)(struct device_node *np);
-};
-
-static const struct clk_rk3588_inits clk_3588_cru_init = {
-	.inits = rk3588_clk_init,
-};
-
-static const struct of_device_id clk_rk3588_match_table[] = {
-	{
-		.compatible = "rockchip,rk3588-cru",
-		.data = &clk_3588_cru_init,
-	},
-	{ }
-};
-
-static int __init clk_rk3588_probe(struct platform_device *pdev)
-{
-	const struct clk_rk3588_inits *init_data;
-	struct device *dev = &pdev->dev;
-
-	init_data = device_get_match_data(dev);
-	if (!init_data)
-		return -EINVAL;
-
-	if (init_data->inits)
-		init_data->inits(dev->of_node);
-
-	return 0;
-}
-
-static struct platform_driver clk_rk3588_driver = {
-	.driver		= {
-		.name	= "clk-rk3588",
-		.of_match_table = clk_rk3588_match_table,
-		.suppress_bind_attrs = true,
-	},
-};
-builtin_platform_driver_probe(clk_rk3588_driver, clk_rk3588_probe);
--
2.42.0


From cdd955afae38ea95dcedd49b31c06f30acb8b4c1 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Fri, 8 Mar 2024 23:19:55 +0100
Subject: [PATCH 41/69] clk: rockchip: handle missing clocks with -EPROBE_DEFER

In the future some clocks will be registered using CLK_OF_DECLARE
and some are registered later from the driver probe routine. Any
clock handled by the probe routine should return -EPROBE_DEFER
until that routine has been called.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 73d2cbdc716b..31b7cc243d82 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -376,7 +376,7 @@ struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np,
 		goto err_free;

 	for (i = 0; i < nr_clks; ++i)
-		clk_table[i] = ERR_PTR(-ENOENT);
+		clk_table[i] = ERR_PTR(-EPROBE_DEFER);

 	ctx->reg_base = base;
 	ctx->clk_data.clks = clk_table;
--
2.42.0


From 35cfb6b6ae66c578fba40a578ee2840821c352bb Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 12 Mar 2024 16:12:18 +0100
Subject: [PATCH 42/69] clk: rockchip: rk3588: register GATE_LINK later

The proper GATE_LINK implementation will use runtime PM to handle the
linked gate clocks, which requires device context. Currently all clocks
are registered early via CLK_OF_DECLARE, which is before the kernel
knows about devices.

Moving the full clocks registration to the probe routine does not work,
since the clocks needed for timers must be registered early.

To work around this issue, most of the clock tree is registered early,
but GATE_LINK clocks are handled in the probe routine. Since the resets
are not needed early either, they have also been moved to the probe
routine.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk-rk3588.c | 64 +++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 74051277ecea..29f31a5526fa 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -266,6 +266,8 @@ static struct rockchip_pll_rate_table rk3588_pll_rates[] = {
 	},							\
 }

+static struct rockchip_clk_provider *early_ctx;
+
 static struct rockchip_cpuclk_rate_table rk3588_cpub0clk_rates[] __initdata = {
 	RK3588_CPUB01CLK_RATE(2496000000, 1),
 	RK3588_CPUB01CLK_RATE(2400000000, 1),
@@ -694,7 +696,7 @@ static struct rockchip_pll_clock rk3588_pll_clks[] __initdata = {
 		     RK3588_MODE_CON0, 10, 15, 0, rk3588_pll_rates),
 };

-static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
+static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 	/*
 	 * CRU Clock-Architecture
 	 */
@@ -2428,7 +2430,9 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
 			RK3588_CLKGATE_CON(68), 5, GFLAGS),
 	GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0,
 			RK3588_CLKGATE_CON(68), 2, GFLAGS),
+};

+static struct rockchip_clk_branch rk3588_clk_branches[] = {
 	GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", ACLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 6, GFLAGS),
 	GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", HCLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 8, GFLAGS),
 	GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS),
@@ -2453,14 +2457,18 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = {
 	GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", HCLK_VO1, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS),
 };

-static void __init rk3588_clk_init(struct device_node *np)
+static void __init rk3588_clk_early_init(struct device_node *np)
 {
 	struct rockchip_clk_provider *ctx;
-	unsigned long clk_nr_clks;
+	unsigned long clk_nr_clks, max_clk_id1, max_clk_id2;
 	void __iomem *reg_base;

-	clk_nr_clks = rockchip_clk_find_max_clk_id(rk3588_clk_branches,
-					ARRAY_SIZE(rk3588_clk_branches)) + 1;
+	max_clk_id1 = rockchip_clk_find_max_clk_id(rk3588_clk_branches,
+					ARRAY_SIZE(rk3588_clk_branches));
+	max_clk_id2 = rockchip_clk_find_max_clk_id(rk3588_early_clk_branches,
+					ARRAY_SIZE(rk3588_early_clk_branches));
+	clk_nr_clks = max(max_clk_id1, max_clk_id2) + 1;
+
 	reg_base = of_iomap(np, 0);
 	if (!reg_base) {
 		pr_err("%s: could not map cru region\n", __func__);
@@ -2473,6 +2481,7 @@ static void __init rk3588_clk_init(struct device_node *np)
 		iounmap(reg_base);
 		return;
 	}
+	early_ctx = ctx;

 	rockchip_clk_register_plls(ctx, rk3588_pll_clks,
 				   ARRAY_SIZE(rk3588_pll_clks),
@@ -2491,14 +2500,53 @@ static void __init rk3588_clk_init(struct device_node *np)
 			&rk3588_cpub1clk_data, rk3588_cpub1clk_rates,
 			ARRAY_SIZE(rk3588_cpub1clk_rates));

+	rockchip_clk_register_branches(ctx, rk3588_early_clk_branches,
+				       ARRAY_SIZE(rk3588_early_clk_branches));
+
+	rockchip_clk_of_add_provider(np, ctx);
+}
+CLK_OF_DECLARE_DRIVER(rk3588_cru, "rockchip,rk3588-cru", rk3588_clk_early_init);
+
+static int clk_rk3588_probe(struct platform_device *pdev)
+{
+	struct rockchip_clk_provider *ctx = early_ctx;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+
 	rockchip_clk_register_branches(ctx, rk3588_clk_branches,
 				       ARRAY_SIZE(rk3588_clk_branches));

-	rk3588_rst_init(np, reg_base);
-
+	rk3588_rst_init(np, ctx->reg_base);
 	rockchip_register_restart_notifier(ctx, RK3588_GLB_SRST_FST, NULL);

+	/*
+	 * Re-add clock provider, so that the newly added clocks are also
+	 * re-parented and get their defaults configured.
+	 */
+	of_clk_del_provider(np);
 	rockchip_clk_of_add_provider(np, ctx);
+
+	return 0;
 }

-CLK_OF_DECLARE(rk3588_cru, "rockchip,rk3588-cru", rk3588_clk_init);
+static const struct of_device_id clk_rk3588_match_table[] = {
+	{
+		.compatible = "rockchip,rk3588-cru",
+	},
+	{ }
+};
+
+static struct platform_driver clk_rk3588_driver = {
+	.probe		= clk_rk3588_probe,
+	.driver		= {
+		.name	= "clk-rk3588",
+		.of_match_table = clk_rk3588_match_table,
+		.suppress_bind_attrs = true,
+	},
+};
+
+static int __init rockchip_clk_rk3588_drv_register(void)
+{
+	return platform_driver_register(&clk_rk3588_driver);
+}
+core_initcall(rockchip_clk_rk3588_drv_register);
--
2.42.0


From 3255eb4a0dff01ef455b3bd4573d291bdbde7ec6 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 25 Mar 2024 19:29:22 +0100
Subject: [PATCH 43/69] clk: rockchip: expose rockchip_clk_set_lookup

Move rockchip_clk_add_lookup to clk.h, so that it can be used
by sub-devices with their own driver. These might also have to
do a lookup, so rename the function to rockchip_clk_set_lookup
and add a matching rockchip_clk_add_lookup.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk.c | 14 ++++----------
 drivers/clk/rockchip/clk.h | 12 ++++++++++++
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 31b7cc243d82..ef2408f10f39 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -197,12 +197,6 @@ static void rockchip_fractional_approximation(struct clk_hw *hw,
 	clk_fractional_divider_general_approximation(hw, rate, parent_rate, m, n);
 }

-static void rockchip_clk_add_lookup(struct rockchip_clk_provider *ctx,
-				    struct clk *clk, unsigned int id)
-{
-	ctx->clk_data.clks[id] = clk;
-}
-
 static struct clk *rockchip_clk_register_frac_branch(
 		struct rockchip_clk_provider *ctx, const char *name,
 		const char *const *parent_names, u8 num_parents,
@@ -292,7 +286,7 @@ static struct clk *rockchip_clk_register_frac_branch(
 			return mux_clk;
 		}

-		rockchip_clk_add_lookup(ctx, mux_clk, child->id);
+		rockchip_clk_set_lookup(ctx, mux_clk, child->id);

 		/* notifier on the fraction divider to catch rate changes */
 		if (frac->mux_frac_idx >= 0) {
@@ -424,7 +418,7 @@ void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx,
 			continue;
 		}

-		rockchip_clk_add_lookup(ctx, clk, list->id);
+		rockchip_clk_set_lookup(ctx, clk, list->id);
 	}
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_register_plls);
@@ -585,7 +579,7 @@ void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 			continue;
 		}

-		rockchip_clk_add_lookup(ctx, clk, list->id);
+		rockchip_clk_set_lookup(ctx, clk, list->id);
 	}
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_register_branches);
@@ -609,7 +603,7 @@ void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx,
 		return;
 	}

-	rockchip_clk_add_lookup(ctx, clk, lookup_id);
+	rockchip_clk_set_lookup(ctx, clk, lookup_id);
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_register_armclk);

diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index fd3b476dedda..e39392e1c2a2 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -969,6 +969,18 @@ struct rockchip_clk_branch {
 #define SGRF_GATE(_id, cname, pname)				\
 		FACTOR(_id, cname, pname, 0, 1, 1)

+static inline struct clk *rockchip_clk_get_lookup(struct rockchip_clk_provider *ctx,
+						  unsigned int id)
+{
+	return ctx->clk_data.clks[id];
+}
+
+static inline void rockchip_clk_set_lookup(struct rockchip_clk_provider *ctx,
+					   struct clk *clk, unsigned int id)
+{
+	ctx->clk_data.clks[id] = clk;
+}
+
 struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np,
 			void __iomem *base, unsigned long nr_clks);
 void rockchip_clk_of_add_provider(struct device_node *np,
--
2.42.0


From f325e300087ba9cb084bc4bcd915355f9159345e Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 25 Mar 2024 19:31:59 +0100
Subject: [PATCH 44/69] clk: rockchip: fix error for unknown clocks

There is a clk == NULL check after the switch to check for
unsupported clk types. Since clk is re-assigned in a loop,
this check is useless right now for anything but the first
round. Let's fix this up by assigning clk = NULL in the
loop before the switch statement.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index ef2408f10f39..e150bc1fc319 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -444,12 +444,13 @@ void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 				    struct rockchip_clk_branch *list,
 				    unsigned int nr_clk)
 {
-	struct clk *clk = NULL;
+	struct clk *clk;
 	unsigned int idx;
 	unsigned long flags;

 	for (idx = 0; idx < nr_clk; idx++, list++) {
 		flags = list->flags;
+		clk = NULL;

 		/* catch simple muxes */
 		switch (list->branch_type) {
--
2.42.0


From 3dfff9b3944a5ba3a1add3fb5167846d1fb5075e Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 25 Mar 2024 19:42:07 +0100
Subject: [PATCH 45/69] clk: rockchip: implement linked gate clock support

Recent Rockchip SoCs have a new hardware block called Native Interface
Unit (NIU), which gates clocks to devices behind them. These clock
gates will only have a running output clock when all of the following
conditions are met:

1. the parent clock is enabled
2. the enable bit is set correctly
3. the linked clock is enabled

To handle them this code registers them as a normal gate type clock,
which takes care of condition 1 + 2. The linked clock is handled by
using runtime PM clocks. Handling it via runtime PM requires setting
up a struct device for each of these clocks with a driver attached
to use the correct runtime PM operations. Thus the complete handling
of these clocks has been moved into its own driver.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/Makefile     |  1 +
 drivers/clk/rockchip/clk-rk3588.c | 23 +------
 drivers/clk/rockchip/clk.c        | 52 ++++++++++++++++
 drivers/clk/rockchip/clk.h        | 25 ++++++++
 drivers/clk/rockchip/gate-link.c  | 99 +++++++++++++++++++++++++++++++
 5 files changed, 179 insertions(+), 21 deletions(-)

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index 36894f6a7022..179be95c6ffb 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile
@@ -13,6 +13,7 @@ clk-rockchip-y += clk-inverter.o
 clk-rockchip-y += clk-mmc-phase.o
 clk-rockchip-y += clk-muxgrf.o
 clk-rockchip-y += clk-ddr.o
+clk-rockchip-y += gate-link.o
 clk-rockchip-$(CONFIG_RESET_CONTROLLER) += softrst.o

 obj-$(CONFIG_CLK_PX30)          += clk-px30.o
diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 29f31a5526fa..1bf84ac44e85 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -12,25 +12,6 @@
 #include <dt-bindings/clock/rockchip,rk3588-cru.h>
 #include "clk.h"

-/*
- * Recent Rockchip SoCs have a new hardware block called Native Interface
- * Unit (NIU), which gates clocks to devices behind them. These effectively
- * need two parent clocks.
- *
- * Downstream enables the linked clock via runtime PM whenever the gate is
- * enabled. This implementation uses separate clock nodes for each of the
- * linked gate clocks, which leaks parts of the clock tree into DT.
- *
- * The GATE_LINK macro instead takes the second parent via 'linkname', but
- * ignores the information. Once the clock framework is ready to handle it, the
- * information should be passed on here. But since these clocks are required to
- * access multiple relevant IP blocks, such as PCIe or USB, we mark all linked
- * clocks critical until a better solution is available. This will waste some
- * power, but avoids leaking implementation details into DT or hanging the
- * system.
- */
-#define GATE_LINK(_id, cname, pname, linkedclk, f, o, b, gf) \
-	GATE(_id, cname, pname, f, o, b, gf)
 #define RK3588_LINKED_CLK		CLK_IS_CRITICAL


@@ -2513,8 +2494,8 @@ static int clk_rk3588_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;

-	rockchip_clk_register_branches(ctx, rk3588_clk_branches,
-				       ARRAY_SIZE(rk3588_clk_branches));
+	rockchip_clk_register_late_branches(dev, ctx, rk3588_clk_branches,
+					    ARRAY_SIZE(rk3588_clk_branches));

 	rk3588_rst_init(np, ctx->reg_base);
 	rockchip_register_restart_notifier(ctx, RK3588_GLB_SRST_FST, NULL);
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index e150bc1fc319..f5f11cc60046 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -19,6 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/reboot.h>

@@ -440,6 +441,29 @@ unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list,
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_find_max_clk_id);

+static struct platform_device *rockchip_clk_register_gate_link(
+		struct device *parent_dev,
+		struct rockchip_clk_provider *ctx,
+		struct rockchip_clk_branch *clkbr)
+{
+	struct rockchip_gate_link_platdata gate_link_pdata = {
+		.ctx = ctx,
+		.clkbr = clkbr,
+	};
+
+	struct platform_device_info pdevinfo = {
+		.parent = parent_dev,
+		.name = "rockchip-gate-link-clk",
+		.id = clkbr->id,
+		.fwnode = dev_fwnode(parent_dev),
+		.of_node_reused = true,
+		.data = &gate_link_pdata,
+		.size_data = sizeof(gate_link_pdata),
+	};
+
+	return platform_device_register_full(&pdevinfo);
+}
+
 void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 				    struct rockchip_clk_branch *list,
 				    unsigned int nr_clk)
@@ -565,6 +589,9 @@ void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 				list->div_width, list->div_flags,
 				ctx->reg_base, &ctx->lock);
 			break;
+		case branch_linked_gate:
+			/* must be registered late, fall-through for error message */
+			break;
 		}

 		/* none of the cases above matched */
@@ -585,6 +612,31 @@ void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 }
 EXPORT_SYMBOL_GPL(rockchip_clk_register_branches);

+void rockchip_clk_register_late_branches(struct device *dev,
+					 struct rockchip_clk_provider *ctx,
+					 struct rockchip_clk_branch *list,
+					 unsigned int nr_clk)
+{
+	unsigned int idx;
+
+	for (idx = 0; idx < nr_clk; idx++, list++) {
+		struct platform_device *pdev = NULL;
+
+		switch (list->branch_type) {
+		case branch_linked_gate:
+			pdev = rockchip_clk_register_gate_link(dev, ctx, list);
+			break;
+		default:
+			dev_err(dev, "unknown clock type %d\n", list->branch_type);
+			break;
+		}
+
+		if (!pdev)
+			dev_err(dev, "failed to register device for clock %s\n", list->name);
+	}
+}
+EXPORT_SYMBOL_GPL(rockchip_clk_register_late_branches);
+
 void rockchip_clk_register_armclk(struct rockchip_clk_provider *ctx,
 				  unsigned int lookup_id,
 				  const char *name, const char *const *parent_names,
diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index e39392e1c2a2..15aa2fd5265b 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h
@@ -517,6 +517,7 @@ enum rockchip_clk_branch_type {
 	branch_divider,
 	branch_fraction_divider,
 	branch_gate,
+	branch_linked_gate,
 	branch_mmc,
 	branch_inverter,
 	branch_factor,
@@ -544,6 +545,7 @@ struct rockchip_clk_branch {
 	int				gate_offset;
 	u8				gate_shift;
 	u8				gate_flags;
+	unsigned int			linked_clk_id;
 	struct rockchip_clk_branch	*child;
 };

@@ -842,6 +844,20 @@ struct rockchip_clk_branch {
 		.gate_flags	= gf,				\
 	}

+#define GATE_LINK(_id, cname, pname, linkedclk, f, o, b, gf)	\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_linked_gate,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.linked_clk_id	= linkedclk,			\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.gate_offset	= o,				\
+		.gate_shift	= b,				\
+		.gate_flags	= gf,				\
+	}
+
 #define MMC(_id, cname, pname, offset, shift)			\
 	{							\
 		.id		= _id,				\
@@ -981,6 +997,11 @@ static inline void rockchip_clk_set_lookup(struct rockchip_clk_provider *ctx,
 	ctx->clk_data.clks[id] = clk;
 }

+struct rockchip_gate_link_platdata {
+	struct rockchip_clk_provider *ctx;
+	struct rockchip_clk_branch *clkbr;
+};
+
 struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np,
 			void __iomem *base, unsigned long nr_clks);
 void rockchip_clk_of_add_provider(struct device_node *np,
@@ -990,6 +1011,10 @@ unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list,
 void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx,
 				    struct rockchip_clk_branch *list,
 				    unsigned int nr_clk);
+void rockchip_clk_register_late_branches(struct device *dev,
+					 struct rockchip_clk_provider *ctx,
+					 struct rockchip_clk_branch *list,
+					 unsigned int nr_clk);
 void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx,
 				struct rockchip_pll_clock *pll_list,
 				unsigned int nr_pll, int grf_lock_offset);
diff --git a/drivers/clk/rockchip/gate-link.c b/drivers/clk/rockchip/gate-link.c
new file mode 100644
index 000000000000..47b6f3e7a6a2
--- /dev/null
+++ b/drivers/clk/rockchip/gate-link.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2024 Collabora Ltd.
+ * Author: Sebastian Reichel <sebastian.reichel@collabora.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/pm_clock.h>
+#include <linux/pm_runtime.h>
+#include <linux/property.h>
+#include "clk.h"
+
+static int rk_clk_gate_link_register(struct device *dev,
+				     struct rockchip_clk_provider *ctx,
+				     struct rockchip_clk_branch *clkbr)
+{
+	unsigned long flags = clkbr->flags | CLK_SET_RATE_PARENT;
+	struct clk *clk;
+
+	clk = clk_register_gate(dev, clkbr->name, clkbr->parent_names[0],
+				flags, ctx->reg_base + clkbr->gate_offset,
+				clkbr->gate_shift, clkbr->gate_flags,
+				&ctx->lock);
+
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	rockchip_clk_set_lookup(ctx, clk, clkbr->id);
+	return 0;
+}
+
+static int rk_clk_gate_link_probe(struct platform_device *pdev)
+{
+	struct rockchip_gate_link_platdata *pdata;
+	struct device *dev = &pdev->dev;
+	struct clk *linked_clk;
+	int ret;
+
+	pdata = dev_get_platdata(dev);
+	if (!pdata)
+		return dev_err_probe(dev, -ENODEV, "missing platform data");
+
+	ret = devm_pm_runtime_enable(dev);
+	if (ret)
+		return ret;
+
+	ret = devm_pm_clk_create(dev);
+	if (ret)
+		return ret;
+
+	linked_clk = rockchip_clk_get_lookup(pdata->ctx, pdata->clkbr->linked_clk_id);
+	ret = pm_clk_add_clk(dev, linked_clk);
+	if (ret)
+		return ret;
+
+	ret = rk_clk_gate_link_register(dev, pdata->ctx, pdata->clkbr);
+	if (ret)
+		goto err;
+
+	return 0;
+
+err:
+	pm_clk_remove_clk(dev, linked_clk);
+	return ret;
+}
+
+static void rk_clk_gate_link_remove(struct platform_device *pdev)
+{
+	struct rockchip_gate_link_platdata *pdata;
+	struct device *dev = &pdev->dev;
+	struct clk *clk, *linked_clk;
+
+	pdata = dev_get_platdata(dev);
+	clk = rockchip_clk_get_lookup(pdata->ctx, pdata->clkbr->id);
+	linked_clk = rockchip_clk_get_lookup(pdata->ctx, pdata->clkbr->linked_clk_id);
+	rockchip_clk_set_lookup(pdata->ctx, ERR_PTR(-ENODEV), pdata->clkbr->id);
+	clk_unregister_gate(clk);
+	pm_clk_remove_clk(dev, linked_clk);
+}
+
+static const struct dev_pm_ops rk_clk_gate_link_pm_ops = {
+	SET_RUNTIME_PM_OPS(pm_clk_suspend, pm_clk_resume, NULL)
+};
+
+struct platform_driver rk_clk_gate_link_driver = {
+	.probe		= rk_clk_gate_link_probe,
+	.remove_new	= rk_clk_gate_link_remove,
+	.driver		= {
+		.name	= "rockchip-gate-link-clk",
+		.pm = &rk_clk_gate_link_pm_ops,
+	},
+};
+
+static int __init rk_clk_gate_link_drv_register(void)
+{
+	return platform_driver_register(&rk_clk_gate_link_driver);
+}
+core_initcall(rk_clk_gate_link_drv_register);
--
2.42.0


From a47e4f23e9c39b1c11314cacdfaaa1aeab59f763 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Fri, 8 Mar 2024 23:32:05 +0100
Subject: [PATCH 46/69] clk: rockchip: rk3588: drop RK3588_LINKED_CLK

With the proper GATE_LINK support, we no longer need to keep the
linked clocks always on. Thus it's time to drop the CLK_IS_CRITICAL
flag for them.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/clk/rockchip/clk-rk3588.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c
index 1bf84ac44e85..42579b6f74b4 100644
--- a/drivers/clk/rockchip/clk-rk3588.c
+++ b/drivers/clk/rockchip/clk-rk3588.c
@@ -12,9 +12,6 @@
 #include <dt-bindings/clock/rockchip,rk3588-cru.h>
 #include "clk.h"

-#define RK3588_LINKED_CLK		CLK_IS_CRITICAL
-
-
 #define RK3588_GRF_SOC_STATUS0		0x600
 #define RK3588_PHYREF_ALT_GATE		0xc38

@@ -1439,7 +1436,7 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 	COMPOSITE_NODIV(HCLK_NVM_ROOT,  "hclk_nvm_root", mux_200m_100m_50m_24m_p, 0,
 			RK3588_CLKSEL_CON(77), 0, 2, MFLAGS,
 			RK3588_CLKGATE_CON(31), 0, GFLAGS),
-	COMPOSITE(ACLK_NVM_ROOT, "aclk_nvm_root", gpll_cpll_p, RK3588_LINKED_CLK,
+	COMPOSITE(ACLK_NVM_ROOT, "aclk_nvm_root", gpll_cpll_p, 0,
 			RK3588_CLKSEL_CON(77), 7, 1, MFLAGS, 2, 5, DFLAGS,
 			RK3588_CLKGATE_CON(31), 1, GFLAGS),
 	GATE(ACLK_EMMC, "aclk_emmc", "aclk_nvm_root", 0,
@@ -1668,13 +1665,13 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 			RK3588_CLKGATE_CON(42), 9, GFLAGS),

 	/* vdpu */
-	COMPOSITE(ACLK_VDPU_ROOT, "aclk_vdpu_root", gpll_cpll_aupll_p, RK3588_LINKED_CLK,
+	COMPOSITE(ACLK_VDPU_ROOT, "aclk_vdpu_root", gpll_cpll_aupll_p, 0,
 			RK3588_CLKSEL_CON(98), 5, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3588_CLKGATE_CON(44), 0, GFLAGS),
 	COMPOSITE_NODIV(ACLK_VDPU_LOW_ROOT, "aclk_vdpu_low_root", mux_400m_200m_100m_24m_p, 0,
 			RK3588_CLKSEL_CON(98), 7, 2, MFLAGS,
 			RK3588_CLKGATE_CON(44), 1, GFLAGS),
-	COMPOSITE_NODIV(HCLK_VDPU_ROOT, "hclk_vdpu_root", mux_200m_100m_50m_24m_p, RK3588_LINKED_CLK,
+	COMPOSITE_NODIV(HCLK_VDPU_ROOT, "hclk_vdpu_root", mux_200m_100m_50m_24m_p, 0,
 			RK3588_CLKSEL_CON(98), 9, 2, MFLAGS,
 			RK3588_CLKGATE_CON(44), 2, GFLAGS),
 	COMPOSITE(ACLK_JPEG_DECODER_ROOT, "aclk_jpeg_decoder_root", gpll_cpll_aupll_spll_p, 0,
@@ -1725,9 +1722,9 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 	COMPOSITE(ACLK_RKVENC0_ROOT, "aclk_rkvenc0_root", gpll_cpll_npll_p, 0,
 			RK3588_CLKSEL_CON(102), 7, 2, MFLAGS, 2, 5, DFLAGS,
 			RK3588_CLKGATE_CON(47), 1, GFLAGS),
-	GATE(HCLK_RKVENC0, "hclk_rkvenc0", "hclk_rkvenc0_root", RK3588_LINKED_CLK,
+	GATE(HCLK_RKVENC0, "hclk_rkvenc0", "hclk_rkvenc0_root", 0,
 			RK3588_CLKGATE_CON(47), 4, GFLAGS),
-	GATE(ACLK_RKVENC0, "aclk_rkvenc0", "aclk_rkvenc0_root", RK3588_LINKED_CLK,
+	GATE(ACLK_RKVENC0, "aclk_rkvenc0", "aclk_rkvenc0_root", 0,
 			RK3588_CLKGATE_CON(47), 5, GFLAGS),
 	COMPOSITE(CLK_RKVENC0_CORE, "clk_rkvenc0_core", gpll_cpll_aupll_npll_p, 0,
 			RK3588_CLKSEL_CON(102), 14, 2, MFLAGS, 9, 5, DFLAGS,
@@ -1737,10 +1734,10 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 			RK3588_CLKGATE_CON(48), 6, GFLAGS),

 	/* vi */
-	COMPOSITE(ACLK_VI_ROOT, "aclk_vi_root", gpll_cpll_npll_aupll_spll_p, RK3588_LINKED_CLK,
+	COMPOSITE(ACLK_VI_ROOT, "aclk_vi_root", gpll_cpll_npll_aupll_spll_p, 0,
 			RK3588_CLKSEL_CON(106), 5, 3, MFLAGS, 0, 5, DFLAGS,
 			RK3588_CLKGATE_CON(49), 0, GFLAGS),
-	COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_200m_100m_50m_24m_p, RK3588_LINKED_CLK,
+	COMPOSITE_NODIV(HCLK_VI_ROOT, "hclk_vi_root", mux_200m_100m_50m_24m_p, 0,
 			RK3588_CLKSEL_CON(106), 8, 2, MFLAGS,
 			RK3588_CLKGATE_CON(49), 1, GFLAGS),
 	COMPOSITE_NODIV(PCLK_VI_ROOT, "pclk_vi_root", mux_100m_50m_24m_p, 0,
@@ -1910,10 +1907,10 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 	COMPOSITE(ACLK_VOP_ROOT, "aclk_vop_root", gpll_cpll_dmyaupll_npll_spll_p, 0,
 			RK3588_CLKSEL_CON(110), 5, 3, MFLAGS, 0, 5, DFLAGS,
 			RK3588_CLKGATE_CON(52), 0, GFLAGS),
-	COMPOSITE_NODIV(ACLK_VOP_LOW_ROOT, "aclk_vop_low_root", mux_400m_200m_100m_24m_p, RK3588_LINKED_CLK,
+	COMPOSITE_NODIV(ACLK_VOP_LOW_ROOT, "aclk_vop_low_root", mux_400m_200m_100m_24m_p, 0,
 			RK3588_CLKSEL_CON(110), 8, 2, MFLAGS,
 			RK3588_CLKGATE_CON(52), 1, GFLAGS),
-	COMPOSITE_NODIV(HCLK_VOP_ROOT, "hclk_vop_root", mux_200m_100m_50m_24m_p, RK3588_LINKED_CLK,
+	COMPOSITE_NODIV(HCLK_VOP_ROOT, "hclk_vop_root", mux_200m_100m_50m_24m_p, 0,
 			RK3588_CLKSEL_CON(110), 10, 2, MFLAGS,
 			RK3588_CLKGATE_CON(52), 2, GFLAGS),
 	COMPOSITE_NODIV(PCLK_VOP_ROOT, "pclk_vop_root", mux_100m_50m_24m_p, 0,
@@ -2416,7 +2413,7 @@ static struct rockchip_clk_branch rk3588_early_clk_branches[] __initdata = {
 static struct rockchip_clk_branch rk3588_clk_branches[] = {
 	GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", ACLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 6, GFLAGS),
 	GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", HCLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 8, GFLAGS),
-	GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS),
+	GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, 0, RK3588_CLKGATE_CON(31), 2, GFLAGS),
 	GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 2, GFLAGS),
 	GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", HCLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 3, GFLAGS),
 	GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 7, GFLAGS),
@@ -2428,9 +2425,9 @@ static struct rockchip_clk_branch rk3588_clk_branches[] = {
 	GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 4, GFLAGS),
 	GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 5, GFLAGS),
 	GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", ACLK_VOP_LOW_ROOT, 0, RK3588_CLKGATE_CON(55), 9, GFLAGS),
-	GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", HCLK_VOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS),
+	GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", HCLK_VOP_ROOT, 0, RK3588_CLKGATE_CON(55), 5, GFLAGS),
 	GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(59), 6, GFLAGS),
-	GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", HCLK_VO1USB_TOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS),
+	GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", HCLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(59), 9, GFLAGS),
 	GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 1, GFLAGS),
 	GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 4, GFLAGS),
 	GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", HCLK_NVM, 0, RK3588_CLKGATE_CON(75), 1, GFLAGS),
--
2.42.0


From 32cb0cc5972ff58f831e4759254aa5aec7af9341 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 2 Jan 2024 09:35:43 +0100
Subject: [PATCH 47/69] arm64: dts: rockchip: rk3588-evb1: add bluetooth rfkill

Add rfkill support for bluetooth. Bluetooth support itself is still
missing, but this ensures bluetooth can be powered off properly.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index 2490be5a5e18..7d988e134df3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -120,6 +120,15 @@ backlight: backlight {
 		pwms = <&pwm2 0 25000 0>;
 	};

+	bluetooth-rfkill {
+		compatible = "rfkill-gpio";
+		label = "rfkill-bluetooth";
+		radio-type = "bluetooth";
+		shutdown-gpios = <&gpio3 RK_PA6 GPIO_ACTIVE_LOW>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&bluetooth_pwren>;
+	};
+
 	wlan-rfkill {
 		compatible = "rfkill-gpio";
 		label = "rfkill-pcie-wlan";
@@ -478,6 +487,12 @@ speaker_amplifier_en: speaker-amplifier-en {
 		};
 	};

+	bluetooth {
+		bluetooth_pwren: bluetooth-pwren {
+			rockchip,pins = <3 RK_PA6 RK_FUNC_GPIO &pcfg_pull_up>;
+		};
+	};
+
 	rtl8111 {
 		rtl8111_isolate: rtl8111-isolate {
 			rockchip,pins = <1 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>;
--
2.42.0


From ec7dac8d718a12ad94635cafb0cf1f636e6c0378 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 2 Jan 2024 09:39:11 +0100
Subject: [PATCH 48/69] arm64: dts: rockchip: rk3588-evb1: improve PCIe
 ethernet pin muxing

Also describe clkreq and wake signals in the PCIe pinmux used
by the onboard LAN card.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index 7d988e134df3..f76748f2136c 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -456,7 +456,7 @@ &pcie2x1l0 {
 &pcie2x1l1 {
 	reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>;
 	pinctrl-names = "default";
-	pinctrl-0 = <&pcie2_1_rst>, <&rtl8111_isolate>;
+	pinctrl-0 = <&pcie2_1_rst>, <&pcie2_1_wake>, <&pcie2_1_clkreq>, <&rtl8111_isolate>;
 	status = "okay";
 };

@@ -528,6 +528,14 @@ pcie2_0_clkreq: pcie2-0-clkreq {
 		pcie2_1_rst: pcie2-1-rst {
 			rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>;
 		};
+
+		pcie2_1_wake: pcie2-1-wake {
+			rockchip,pins = <4 RK_PA1 4 &pcfg_pull_none>;
+		};
+
+		pcie2_1_clkreq: pcie2-1-clkreq {
+			rockchip,pins = <4 RK_PA0 4 &pcfg_pull_none>;
+		};
 	};

 	pcie3 {
--
2.42.0


From 9d361fbc9d4d191cdb74b6af85a02aa9dec046a1 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Fri, 3 Nov 2023 19:58:02 +0200
Subject: [PATCH 49/69] [WIP] drm/rockchip: vop2: Improve display modes
 handling on rk3588

The initial vop2 support for rk3588 in mainline is not able to handle
all display modes supported by connected displays, e.g.
2560x1440-75.00Hz, 2048x1152-60.00Hz, 1024x768-60.00Hz.

Additionally, it doesn't cope with non-integer refresh rates like 59.94,
29.97, 23.98, etc.

Improve HDMI0 clocking in order to support the additional display modes.

Fixes: 5a028e8f062f ("drm/rockchip: vop2: Add support for rk3588")
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 drivers/gpu/drm/rockchip/rockchip_drm_vop2.c | 553 ++++++++++++++++++-
 1 file changed, 552 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index fdd768bbd487..c1361ceaec41 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -5,6 +5,8 @@
  */
 #include <linux/bitfield.h>
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
 #include <linux/component.h>
 #include <linux/delay.h>
 #include <linux/iopoll.h>
@@ -212,6 +214,10 @@ struct vop2 {
 	struct clk *hclk;
 	struct clk *aclk;
 	struct clk *pclk;
+	// [CC:] hack to support additional display modes
+	struct clk *hdmi0_phy_pll;
+	/* list_head of internal clk */
+	struct list_head clk_list_head;

 	/* optional internal rgb encoder */
 	struct rockchip_rgb *rgb;
@@ -220,6 +226,19 @@ struct vop2 {
 	struct vop2_win win[];
 };

+struct vop2_clk {
+	struct vop2 *vop2;
+	struct list_head list;
+	unsigned long rate;
+	struct clk_hw hw;
+	struct clk_divider div;
+	int div_val;
+	u8 parent_index;
+};
+
+#define to_vop2_clk(_hw) container_of(_hw, struct vop2_clk, hw)
+#define VOP2_MAX_DCLK_RATE	600000 /* kHz */
+
 #define vop2_output_if_is_hdmi(x)	((x) == ROCKCHIP_VOP2_EP_HDMI0 || \
 					 (x) == ROCKCHIP_VOP2_EP_HDMI1)

@@ -1474,9 +1493,30 @@ static bool vop2_crtc_mode_fixup(struct drm_crtc *crtc,
 				 const struct drm_display_mode *mode,
 				 struct drm_display_mode *adj_mode)
 {
+	struct vop2_video_port *vp = to_vop2_video_port(crtc);
+	struct drm_connector *connector;
+	struct drm_connector_list_iter conn_iter;
+	struct drm_crtc_state *new_crtc_state = container_of(mode, struct drm_crtc_state, mode);
 	drm_mode_set_crtcinfo(adj_mode, CRTC_INTERLACE_HALVE_V |
 					CRTC_STEREO_DOUBLE);

+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		adj_mode->crtc_clock *= 2;
+
+	drm_connector_list_iter_begin(crtc->dev, &conn_iter);
+	drm_for_each_connector_iter(connector, &conn_iter) {
+		if ((new_crtc_state->connector_mask & drm_connector_mask(connector)) &&
+		    ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
+		     (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))) {
+			drm_connector_list_iter_end(&conn_iter);
+			return true;
+		}
+	}
+	drm_connector_list_iter_end(&conn_iter);
+
+	if (adj_mode->crtc_clock <= VOP2_MAX_DCLK_RATE)
+		adj_mode->crtc_clock = DIV_ROUND_UP(clk_round_rate(vp->dclk,
+						    adj_mode->crtc_clock * 1000), 1000);
 	return true;
 }

@@ -1661,6 +1701,31 @@ static unsigned long rk3588_calc_dclk(unsigned long child_clk, unsigned long max
 		return 0;
 }

+static struct vop2_clk *vop2_clk_get(struct vop2 *vop2, const char *name);
+
+static int vop2_cru_set_rate(struct vop2_clk *if_pixclk, struct vop2_clk *if_dclk)
+{
+	int ret = 0;
+
+	if (if_pixclk) {
+		ret =  clk_set_rate(if_pixclk->hw.clk, if_pixclk->rate);
+		if (ret < 0) {
+			DRM_DEV_ERROR(if_pixclk->vop2->dev, "set %s to %ld failed: %d\n",
+				      clk_hw_get_name(&if_pixclk->hw), if_pixclk->rate, ret);
+			return ret;
+		}
+	}
+
+	if (if_dclk) {
+		ret = clk_set_rate(if_dclk->hw.clk, if_dclk->rate);
+		if (ret < 0)
+			DRM_DEV_ERROR(if_dclk->vop2->dev, "set %s to %ld failed %d\n",
+				      clk_hw_get_name(&if_dclk->hw), if_dclk->rate, ret);
+	}
+
+	return ret;
+}
+
 /*
  * 4 pixclk/cycle on rk3588
  * RGB/eDP/HDMI: if_pixclk >= dclk_core
@@ -1684,6 +1749,72 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id,
 	int K = 1;

 	if (vop2_output_if_is_hdmi(id)) {
+		if (vop2->data->soc_id == 3588 && id == ROCKCHIP_VOP2_EP_HDMI0 &&
+		    vop2->hdmi0_phy_pll) {
+			const char *clk_src_name = "hdmi_edp0_clk_src";
+			const char *clk_parent_name = "dclk";
+			const char *pixclk_name = "hdmi_edp0_pixclk";
+			const char *dclk_name = "hdmi_edp0_dclk";
+			struct vop2_clk *if_clk_src, *if_clk_parent, *if_pixclk, *if_dclk, *dclk, *dclk_core, *dclk_out;
+			char clk_name[32];
+			int ret;
+
+			if_clk_src = vop2_clk_get(vop2, clk_src_name);
+			snprintf(clk_name, sizeof(clk_name), "%s%d", clk_parent_name, vp->id);
+			if_clk_parent = vop2_clk_get(vop2, clk_name);
+			if_pixclk = vop2_clk_get(vop2, pixclk_name);
+			if_dclk = vop2_clk_get(vop2, dclk_name);
+			if (!if_pixclk || !if_clk_parent) {
+				DRM_DEV_ERROR(vop2->dev, "failed to get connector interface clk\n");
+				return -ENODEV;
+			}
+
+			ret = clk_set_parent(if_clk_src->hw.clk, if_clk_parent->hw.clk);
+			if (ret < 0) {
+				DRM_DEV_ERROR(vop2->dev, "failed to set parent(%s) for %s: %d\n",
+					      __clk_get_name(if_clk_parent->hw.clk),
+					      __clk_get_name(if_clk_src->hw.clk), ret);
+				return ret;
+			}
+
+			if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
+				K = 2;
+
+			if_pixclk->rate = (dclk_core_rate << 1) / K;
+			if_dclk->rate = dclk_core_rate / K;
+
+			snprintf(clk_name, sizeof(clk_name), "dclk_core%d", vp->id);
+			dclk_core = vop2_clk_get(vop2, clk_name);
+
+			snprintf(clk_name, sizeof(clk_name), "dclk_out%d", vp->id);
+			dclk_out = vop2_clk_get(vop2, clk_name);
+
+			snprintf(clk_name, sizeof(clk_name), "dclk%d", vp->id);
+			dclk = vop2_clk_get(vop2, clk_name);
+			if (v_pixclk <= (VOP2_MAX_DCLK_RATE * 1000)) {
+				if (output_mode == ROCKCHIP_OUT_MODE_YUV420)
+					v_pixclk = v_pixclk >> 1;
+			} else {
+				v_pixclk = v_pixclk >> 2;
+			}
+			clk_set_rate(dclk->hw.clk, v_pixclk);
+
+			if (dclk_core_rate > if_pixclk->rate) {
+				clk_set_rate(dclk_core->hw.clk, dclk_core_rate);
+				ret = vop2_cru_set_rate(if_pixclk, if_dclk);
+			} else {
+				ret = vop2_cru_set_rate(if_pixclk, if_dclk);
+				clk_set_rate(dclk_core->hw.clk, dclk_core_rate);
+			}
+
+			*dclk_core_div = dclk_core->div_val;
+			*dclk_out_div = dclk_out->div_val;
+			*if_pixclk_div = if_pixclk->div_val;
+			*if_dclk_div = if_dclk->div_val;
+
+			return dclk->rate;
+		}
+
 		/*
 		 * K = 2: dclk_core = if_pixclk_rate > if_dclk_rate
 		 * K = 1: dclk_core = hdmie_edp_dclk > if_pixclk_rate
@@ -1915,6 +2046,22 @@ static int us_to_vertical_line(struct drm_display_mode *mode, int us)
 	return us * mode->clock / mode->htotal / 1000;
 }

+// [CC:] rework virtual clock
+static struct vop2_clk *vop2_clk_get(struct vop2 *vop2, const char *name)
+{
+	struct vop2_clk *clk, *n;
+
+	if (!name)
+		return NULL;
+
+	list_for_each_entry_safe(clk, n, &vop2->clk_list_head, list) {
+		if (!strcmp(clk_hw_get_name(&clk->hw), name))
+			return clk;
+	}
+
+	return NULL;
+}
+
 static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 				    struct drm_atomic_state *state)
 {
@@ -1942,6 +2089,8 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,
 	u32 val, polflags;
 	int ret;
 	struct drm_encoder *encoder;
+	char clk_name[32];
+	struct vop2_clk *dclk;

 	drm_dbg(vop2->drm, "Update mode to %dx%d%s%d, type: %d for vp%d\n",
 		hdisplay, vdisplay, mode->flags & DRM_MODE_FLAG_INTERLACE ? "i" : "p",
@@ -2042,11 +2191,38 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc,

 	if (mode->flags & DRM_MODE_FLAG_DBLCLK) {
 		dsp_ctrl |= RK3568_VP_DSP_CTRL__CORE_DCLK_DIV;
-		clock *= 2;
+		// [CC:] done via mode_fixup
+		// clock *= 2;
 	}

 	vop2_vp_write(vp, RK3568_VP_MIPI_CTRL, 0);

+	snprintf(clk_name, sizeof(clk_name), "dclk%d", vp->id);
+	dclk = vop2_clk_get(vop2, clk_name);
+	if (dclk) {
+		/*
+		 * use HDMI_PHY_PLL as dclk source under 4K@60 if it is available,
+		 * otherwise use system cru as dclk source.
+		 */
+		drm_for_each_encoder_mask(encoder, crtc->dev, crtc_state->encoder_mask) {
+			struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder);
+
+			// [CC:] Using PHY PLL to handle all display modes
+			if (rkencoder->crtc_endpoint_id == ROCKCHIP_VOP2_EP_HDMI0) {
+				clk_get_rate(vop2->hdmi0_phy_pll);
+
+				if (mode->crtc_clock <= VOP2_MAX_DCLK_RATE) {
+					ret = clk_set_parent(vp->dclk, vop2->hdmi0_phy_pll);
+					if (ret < 0)
+						DRM_WARN("failed to set clock parent for %s\n",
+							 __clk_get_name(vp->dclk));
+				}
+
+				clock = dclk->rate;
+			}
+		}
+	}
+
 	clk_set_rate(vp->dclk, clock);

 	vop2_post_config(crtc);
@@ -2502,7 +2678,43 @@ static void vop2_crtc_atomic_flush(struct drm_crtc *crtc,
 	spin_unlock_irq(&crtc->dev->event_lock);
 }

+static enum drm_mode_status
+vop2_crtc_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode)
+{
+	struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state);
+	struct vop2_video_port *vp = to_vop2_video_port(crtc);
+	struct vop2 *vop2 = vp->vop2;
+	const struct vop2_data *vop2_data = vop2->data;
+	const struct vop2_video_port_data *vp_data = &vop2_data->vp[vp->id];
+	int request_clock = mode->clock;
+	int clock;
+
+	if (mode->hdisplay > vp_data->max_output.width)
+		return MODE_BAD_HVALUE;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		request_clock *= 2;
+
+	if (request_clock <= VOP2_MAX_DCLK_RATE) {
+		clock = request_clock;
+	} else {
+		request_clock = request_clock >> 2;
+		clock = clk_round_rate(vp->dclk, request_clock * 1000) / 1000;
+	}
+
+	/*
+	 * Hdmi or DisplayPort request a Accurate clock.
+	 */
+	if (vcstate->output_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    vcstate->output_type == DRM_MODE_CONNECTOR_DisplayPort)
+		if (clock != request_clock)
+			return MODE_CLOCK_RANGE;
+
+	return MODE_OK;
+}
+
 static const struct drm_crtc_helper_funcs vop2_crtc_helper_funcs = {
+	.mode_valid = vop2_crtc_mode_valid,
 	.mode_fixup = vop2_crtc_mode_fixup,
 	.atomic_check = vop2_crtc_atomic_check,
 	.atomic_begin = vop2_crtc_atomic_begin,
@@ -3072,6 +3284,336 @@ static const struct regmap_config vop2_regmap_config = {
 	.cache_type	= REGCACHE_MAPLE,
 };

+/*
+ * BEGIN virtual clock
+ */
+#define PLL_RATE_MIN	30000000
+
+#define cru_dbg(format, ...) do {				\
+		if (cru_debug)					\
+			pr_info("%s: " format, __func__, ## __VA_ARGS__); \
+	} while (0)
+
+#define PNAME(x) static const char *const x[]
+
+enum vop_clk_branch_type {
+	branch_mux,
+	branch_divider,
+	branch_factor,
+	branch_virtual,
+};
+
+#define VIR(cname)						\
+	{							\
+		.branch_type	= branch_virtual,		\
+		.name		= cname,			\
+	}
+
+
+#define MUX(cname, pnames, f)			\
+	{							\
+		.branch_type	= branch_mux,			\
+		.name		= cname,			\
+		.parent_names	= pnames,			\
+		.num_parents	= ARRAY_SIZE(pnames),		\
+		.flags		= f,				\
+	}
+
+#define FACTOR(cname, pname,  f)			\
+	{							\
+		.branch_type	= branch_factor,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+	}
+
+#define DIV(cname, pname, f, w)			\
+	{							\
+		.branch_type	= branch_divider,		\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.flags		= f,				\
+		.div_width	= w,				\
+	}
+
+struct vop2_clk_branch {
+	enum vop_clk_branch_type	branch_type;
+	const char			*name;
+	const char			*const *parent_names;
+	u8				num_parents;
+	unsigned long			flags;
+	u8				div_shift;
+	u8				div_width;
+	u8				div_flags;
+};
+
+PNAME(mux_port0_dclk_src_p)		= { "dclk0", "dclk1" };
+PNAME(mux_port2_dclk_src_p)		= { "dclk2", "dclk1" };
+PNAME(mux_dp_pixclk_p)			= { "dclk_out0", "dclk_out1", "dclk_out2" };
+PNAME(mux_hdmi_edp_clk_src_p)		= { "dclk0", "dclk1", "dclk2" };
+PNAME(mux_mipi_clk_src_p)		= { "dclk_out1", "dclk_out2", "dclk_out3" };
+PNAME(mux_dsc_8k_clk_src_p)		= { "dclk0", "dclk1", "dclk2", "dclk3" };
+PNAME(mux_dsc_4k_clk_src_p)		= { "dclk0", "dclk1", "dclk2", "dclk3" };
+
+/*
+ * We only use this clk driver calculate the div
+ * of dclk_core/dclk_out/if_pixclk/if_dclk and
+ * the rate of the dclk from the soc.
+ *
+ * We don't touch the cru in the vop here, as
+ * these registers has special read andy write
+ * limits.
+ */
+static struct vop2_clk_branch rk3588_vop_clk_branches[] = {
+	VIR("dclk0"),
+	VIR("dclk1"),
+	VIR("dclk2"),
+	VIR("dclk3"),
+
+	MUX("port0_dclk_src", mux_port0_dclk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("dclk_core0", "port0_dclk_src", CLK_SET_RATE_PARENT, 2),
+	DIV("dclk_out0", "port0_dclk_src", CLK_SET_RATE_PARENT, 2),
+
+	FACTOR("port1_dclk_src", "dclk1", CLK_SET_RATE_PARENT),
+	DIV("dclk_core1", "port1_dclk_src", CLK_SET_RATE_PARENT, 2),
+	DIV("dclk_out1", "port1_dclk_src", CLK_SET_RATE_PARENT, 2),
+
+	MUX("port2_dclk_src", mux_port2_dclk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("dclk_core2", "port2_dclk_src", CLK_SET_RATE_PARENT, 2),
+	DIV("dclk_out2", "port2_dclk_src", CLK_SET_RATE_PARENT, 2),
+
+	FACTOR("port3_dclk_src", "dclk3", CLK_SET_RATE_PARENT),
+	DIV("dclk_core3", "port3_dclk_src", CLK_SET_RATE_PARENT, 2),
+	DIV("dclk_out3", "port3_dclk_src", CLK_SET_RATE_PARENT, 2),
+
+	MUX("dp0_pixclk", mux_dp_pixclk_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	MUX("dp1_pixclk", mux_dp_pixclk_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+
+	MUX("hdmi_edp0_clk_src", mux_hdmi_edp_clk_src_p,
+	    CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("hdmi_edp0_dclk", "hdmi_edp0_clk_src", 0, 2),
+	DIV("hdmi_edp0_pixclk", "hdmi_edp0_clk_src", CLK_SET_RATE_PARENT, 1),
+
+	MUX("hdmi_edp1_clk_src", mux_hdmi_edp_clk_src_p,
+	    CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("hdmi_edp1_dclk", "hdmi_edp1_clk_src", 0, 2),
+	DIV("hdmi_edp1_pixclk", "hdmi_edp1_clk_src", CLK_SET_RATE_PARENT, 1),
+
+	MUX("mipi0_clk_src", mux_mipi_clk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("mipi0_pixclk", "mipi0_clk_src", CLK_SET_RATE_PARENT, 2),
+
+	MUX("mipi1_clk_src", mux_mipi_clk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("mipi1_pixclk", "mipi1_clk_src", CLK_SET_RATE_PARENT, 2),
+
+	FACTOR("rgb_pixclk", "port3_dclk_src", CLK_SET_RATE_PARENT),
+
+	MUX("dsc_8k_txp_clk_src", mux_dsc_8k_clk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("dsc_8k_txp_clk", "dsc_8k_txp_clk_src", 0, 2),
+	DIV("dsc_8k_pxl_clk", "dsc_8k_txp_clk_src", 0, 2),
+	DIV("dsc_8k_cds_clk", "dsc_8k_txp_clk_src", 0, 2),
+
+	MUX("dsc_4k_txp_clk_src", mux_dsc_4k_clk_src_p, CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT),
+	DIV("dsc_4k_txp_clk", "dsc_4k_txp_clk_src", 0, 2),
+	DIV("dsc_4k_pxl_clk", "dsc_4k_txp_clk_src", 0, 2),
+	DIV("dsc_4k_cds_clk", "dsc_4k_txp_clk_src", 0, 2),
+};
+
+static unsigned long clk_virtual_recalc_rate(struct clk_hw *hw,
+		unsigned long parent_rate)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+
+	return (unsigned long)vop2_clk->rate;
+}
+
+static long clk_virtual_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+
+	vop2_clk->rate = rate;
+
+	return rate;
+}
+
+static int clk_virtual_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long parent_rate)
+{
+	return 0;
+}
+
+const struct clk_ops clk_virtual_ops = {
+	.round_rate = clk_virtual_round_rate,
+	.set_rate = clk_virtual_set_rate,
+	.recalc_rate = clk_virtual_recalc_rate,
+};
+
+static u8 vop2_mux_get_parent(struct clk_hw *hw)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+
+	// cru_dbg("%s index: %d\n", clk_hw_get_name(hw), vop2_clk->parent_index);
+	return vop2_clk->parent_index;
+}
+
+static int vop2_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+
+	vop2_clk->parent_index = index;
+
+	// cru_dbg("%s index: %d\n", clk_hw_get_name(hw), index);
+	return 0;
+}
+
+static int vop2_clk_mux_determine_rate(struct clk_hw *hw,
+			     struct clk_rate_request *req)
+{
+	// cru_dbg("%s  %ld(min: %ld max: %ld)\n",
+	//        clk_hw_get_name(hw), req->rate, req->min_rate, req->max_rate);
+	return __clk_mux_determine_rate(hw, req);
+}
+
+static const struct clk_ops vop2_mux_clk_ops = {
+	.get_parent = vop2_mux_get_parent,
+	.set_parent = vop2_mux_set_parent,
+	.determine_rate = vop2_clk_mux_determine_rate,
+};
+
+#define div_mask(width)	((1 << (width)) - 1)
+
+static int vop2_div_get_val(unsigned long rate, unsigned long parent_rate)
+{
+	unsigned int div, value;
+
+	div = DIV_ROUND_UP_ULL((u64)parent_rate, rate);
+
+	value = ilog2(div);
+
+	return value;
+}
+
+static unsigned long vop2_clk_div_recalc_rate(struct clk_hw *hw,
+					  unsigned long parent_rate)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+	unsigned long rate;
+	unsigned int div;
+
+	div =  1 << vop2_clk->div_val;
+	rate = parent_rate / div;
+
+	// cru_dbg("%s rate: %ld(prate: %ld)\n", clk_hw_get_name(hw), rate, parent_rate);
+	return rate;
+}
+
+static long vop2_clk_div_round_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long *prate)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+
+	if (clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) {
+		if (*prate < rate)
+			*prate = rate;
+		if ((*prate >> vop2_clk->div.width) > rate)
+			*prate = rate;
+
+		if ((*prate % rate))
+			*prate = rate;
+
+		/* SOC PLL can't output a too low pll freq */
+		if (*prate < PLL_RATE_MIN)
+			*prate = rate << vop2_clk->div.width;
+	}
+
+	// cru_dbg("%s rate: %ld(prate: %ld)\n", clk_hw_get_name(hw), rate, *prate);
+	return rate;
+}
+
+static int vop2_clk_div_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate)
+{
+	struct vop2_clk *vop2_clk = to_vop2_clk(hw);
+	int div_val;
+
+	div_val = vop2_div_get_val(rate, parent_rate);
+	vop2_clk->div_val = div_val;
+
+	// cru_dbg("%s prate: %ld rate: %ld div_val: %d\n",
+	//        clk_hw_get_name(hw), parent_rate, rate, div_val);
+	return 0;
+}
+
+static const struct clk_ops vop2_div_clk_ops = {
+	.recalc_rate = vop2_clk_div_recalc_rate,
+	.round_rate = vop2_clk_div_round_rate,
+	.set_rate = vop2_clk_div_set_rate,
+};
+
+static struct clk *vop2_clk_register(struct vop2 *vop2, struct vop2_clk_branch *branch)
+{
+	struct clk_init_data init = {};
+	struct vop2_clk *vop2_clk;
+	struct clk *clk;
+
+	vop2_clk = devm_kzalloc(vop2->dev, sizeof(*vop2_clk), GFP_KERNEL);
+	if (!vop2_clk)
+		return ERR_PTR(-ENOMEM);
+
+	vop2_clk->vop2 = vop2;
+	vop2_clk->hw.init = &init;
+	vop2_clk->div.shift = branch->div_shift;
+	vop2_clk->div.width = branch->div_width;
+
+	init.name = branch->name;
+	init.flags = branch->flags;
+	init.num_parents = branch->num_parents;
+	init.parent_names = branch->parent_names;
+	if (branch->branch_type == branch_divider) {
+		init.ops = &vop2_div_clk_ops;
+	} else if (branch->branch_type == branch_virtual) {
+		init.ops = &clk_virtual_ops;
+		init.num_parents = 0;
+		init.parent_names = NULL;
+	} else {
+		init.ops = &vop2_mux_clk_ops;
+	}
+
+	clk = devm_clk_register(vop2->dev, &vop2_clk->hw);
+	if (!IS_ERR(clk))
+		list_add_tail(&vop2_clk->list, &vop2->clk_list_head);
+	else
+		DRM_DEV_ERROR(vop2->dev, "Register %s failed\n", branch->name);
+
+	return clk;
+}
+
+static int vop2_clk_init(struct vop2 *vop2)
+{
+	struct vop2_clk_branch *branch = rk3588_vop_clk_branches;
+	unsigned int nr_clk = ARRAY_SIZE(rk3588_vop_clk_branches);
+	unsigned int idx;
+	struct vop2_clk *clk, *n;
+
+	INIT_LIST_HEAD(&vop2->clk_list_head);
+
+	if (vop2->data->soc_id < 3588 || vop2->hdmi0_phy_pll == NULL)
+		return 0;
+
+	list_for_each_entry_safe(clk, n, &vop2->clk_list_head, list) {
+		list_del(&clk->list);
+	}
+
+	for (idx = 0; idx < nr_clk; idx++, branch++)
+		vop2_clk_register(vop2, branch);
+
+	return 0;
+}
+/*
+ * END virtual clock
+ */
+
 static int vop2_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -3165,6 +3707,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(vop2->pclk);
 	}

+	vop2->hdmi0_phy_pll = devm_clk_get_optional(vop2->drm->dev, "hdmi0_phy_pll");
+	if (IS_ERR(vop2->hdmi0_phy_pll)) {
+		DRM_DEV_ERROR(vop2->dev, "failed to get hdmi0_phy_pll source\n");
+		return PTR_ERR(vop2->hdmi0_phy_pll);
+	}
+
 	vop2->irq = platform_get_irq(pdev, 0);
 	if (vop2->irq < 0) {
 		drm_err(vop2->drm, "cannot find irq for vop2\n");
@@ -3181,6 +3729,9 @@ static int vop2_bind(struct device *dev, struct device *master, void *data)
 	if (ret)
 		return ret;

+	// [CC:] rework virtual clock
+	vop2_clk_init(vop2);
+
 	ret = vop2_find_rgb_encoder(vop2);
 	if (ret >= 0) {
 		vop2->rgb = rockchip_rgb_init(dev, &vop2->vps[ret].crtc,
--
2.42.0


From 416924039dc4b4ff1f0426b4a31bf15404700ab8 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Mon, 5 Feb 2024 01:38:48 +0200
Subject: [PATCH 50/69] [WIP] phy: phy-rockchip-samsung-hdptx: Add FRL & EARC
 support

For upstreaming, this requires extending the standard PHY API to support
HDMI configuration options [1].

Currently, the bus_width PHY attribute is used to pass clock rate and
flags for 10-bit color depth, FRL and EARC. This is done by the HDMI
bridge driver via phy_set_bus_width().

[1]: https://lore.kernel.org/all/59d5595a24bbcca897e814440179fa2caf3dff38.1707040881.git.Sandor.yu@nxp.com/

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 434 +++++++++++++++++-
 1 file changed, 431 insertions(+), 3 deletions(-)

diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index 946c01210ac8..44acea3f86af 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -190,6 +190,12 @@
 #define LN3_TX_SER_RATE_SEL_HBR2	BIT(3)
 #define LN3_TX_SER_RATE_SEL_HBR3	BIT(2)

+#define HDMI20_MAX_RATE			600000000
+#define DATA_RATE_MASK			0xFFFFFFF
+#define COLOR_DEPTH_MASK		BIT(31)
+#define HDMI_MODE_MASK			BIT(30)
+#define HDMI_EARC_MASK			BIT(29)
+
 struct lcpll_config {
 	u32 bit_rate;
 	u8 lcvco_mode_en;
@@ -272,6 +278,25 @@ struct rk_hdptx_phy {
 	struct clk_bulk_data *clks;
 	int nr_clks;
 	struct reset_control_bulk_data rsts[RST_MAX];
+	bool earc_en;
+};
+
+static const struct lcpll_config lcpll_cfg[] = {
+	{ 48000000, 1, 0, 0, 0x7d, 0x7d, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2,
+	  0, 0x13, 0x18, 1, 0, 0x20, 0x0c, 1, 0, },
+	{ 40000000, 1, 1, 0, 0x68, 0x68, 1, 1, 0, 0, 0, 1, 1, 1, 1, 9, 0, 1, 1,
+	  0, 2, 3, 1, 0, 0x20, 0x0c, 1, 0, },
+	{ 32000000, 1, 1, 1, 0x6b, 0x6b, 1, 1, 0, 1, 2, 1, 1, 1, 1, 9, 1, 2, 1,
+	  0, 0x0d, 0x18, 1, 0, 0x20, 0x0c, 1, 1, },
+};
+
+static const struct ropll_config ropll_frl_cfg[] = {
+	{ 24000000, 0x19, 0x19, 1, 1, 0, 1, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
+	  0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+	{ 18000000, 0x7d, 0x7d, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
+	  0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
+	{ 9000000, 0x7d, 0x7d, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
+	  0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, },
 };

 static const struct ropll_config ropll_tmds_cfg[] = {
@@ -449,6 +474,73 @@ static const struct reg_sequence rk_hdtpx_tmds_cmn_init_seq[] = {
 	REG_SEQ0(CMN_REG(009b), 0x00),
 };

+static const struct reg_sequence rk_hdtpx_frl_cmn_init_seq[] = {
+	REG_SEQ0(CMN_REG(0011), 0x00),
+	REG_SEQ0(CMN_REG(0017), 0x00),
+	REG_SEQ0(CMN_REG(0026), 0x53),
+	REG_SEQ0(CMN_REG(0030), 0x00),
+	REG_SEQ0(CMN_REG(0031), 0x20),
+	REG_SEQ0(CMN_REG(0032), 0x30),
+	REG_SEQ0(CMN_REG(0033), 0x0b),
+	REG_SEQ0(CMN_REG(0034), 0x23),
+	REG_SEQ0(CMN_REG(0042), 0xb8),
+	REG_SEQ0(CMN_REG(004e), 0x14),
+	REG_SEQ0(CMN_REG(0074), 0x00),
+	REG_SEQ0(CMN_REG(0081), 0x09),
+	REG_SEQ0(CMN_REG(0086), 0x01),
+	REG_SEQ0(CMN_REG(0087), 0x0c),
+	REG_SEQ0(CMN_REG(009b), 0x10),
+};
+
+static const struct reg_sequence rk_hdtpx_frl_ropll_cmn_init_seq[] = {
+	REG_SEQ0(CMN_REG(0008), 0x00),
+	REG_SEQ0(CMN_REG(001e), 0x14),
+	REG_SEQ0(CMN_REG(0020), 0x00),
+	REG_SEQ0(CMN_REG(0021), 0x00),
+	REG_SEQ0(CMN_REG(0022), 0x11),
+	REG_SEQ0(CMN_REG(0023), 0x00),
+	REG_SEQ0(CMN_REG(0025), 0x00),
+	REG_SEQ0(CMN_REG(0027), 0x00),
+	REG_SEQ0(CMN_REG(0028), 0x00),
+	REG_SEQ0(CMN_REG(002a), 0x01),
+	REG_SEQ0(CMN_REG(002b), 0x00),
+	REG_SEQ0(CMN_REG(002c), 0x00),
+	REG_SEQ0(CMN_REG(002d), 0x00),
+	REG_SEQ0(CMN_REG(002e), 0x00),
+	REG_SEQ0(CMN_REG(002f), 0x04),
+	REG_SEQ0(CMN_REG(003d), 0x40),
+	REG_SEQ0(CMN_REG(005c), 0x25),
+	REG_SEQ0(CMN_REG(0089), 0x00),
+	REG_SEQ0(CMN_REG(0094), 0x00),
+	REG_SEQ0(CMN_REG(0097), 0x02),
+	REG_SEQ0(CMN_REG(0099), 0x04),
+};
+
+static const struct reg_sequence rk_hdtpx_frl_lcpll_cmn_init_seq[] = {
+	REG_SEQ0(CMN_REG(0025), 0x10),
+	REG_SEQ0(CMN_REG(0027), 0x01),
+	REG_SEQ0(CMN_REG(0028), 0x0d),
+	REG_SEQ0(CMN_REG(002e), 0x02),
+	REG_SEQ0(CMN_REG(002f), 0x0d),
+	REG_SEQ0(CMN_REG(003d), 0x00),
+	REG_SEQ0(CMN_REG(0051), 0x00),
+	REG_SEQ0(CMN_REG(0055), 0x00),
+	REG_SEQ0(CMN_REG(0059), 0x11),
+	REG_SEQ0(CMN_REG(005a), 0x03),
+	REG_SEQ0(CMN_REG(005c), 0x05),
+	REG_SEQ0(CMN_REG(005e), 0x07),
+	REG_SEQ0(CMN_REG(0060), 0x01),
+	REG_SEQ0(CMN_REG(0064), 0x07),
+	REG_SEQ0(CMN_REG(0065), 0x00),
+	REG_SEQ0(CMN_REG(0069), 0x00),
+	REG_SEQ0(CMN_REG(006c), 0x00),
+	REG_SEQ0(CMN_REG(0070), 0x01),
+	REG_SEQ0(CMN_REG(0089), 0x02),
+	REG_SEQ0(CMN_REG(0095), 0x00),
+	REG_SEQ0(CMN_REG(0097), 0x00),
+	REG_SEQ0(CMN_REG(0099), 0x00),
+};
+
 static const struct reg_sequence rk_hdtpx_common_sb_init_seq[] = {
 	REG_SEQ0(SB_REG(0114), 0x00),
 	REG_SEQ0(SB_REG(0115), 0x00),
@@ -472,6 +564,17 @@ static const struct reg_sequence rk_hdtpx_tmds_lntop_lowbr_seq[] = {
 	REG_SEQ0(LNTOP_REG(0205), 0x1f),
 };

+static const struct reg_sequence rk_hdtpx_frl_lntop_init_seq[] = {
+	REG_SEQ0(LNTOP_REG(0200), 0x04),
+	REG_SEQ0(LNTOP_REG(0201), 0x00),
+	REG_SEQ0(LNTOP_REG(0202), 0x00),
+	REG_SEQ0(LNTOP_REG(0203), 0xf0),
+	REG_SEQ0(LNTOP_REG(0204), 0xff),
+	REG_SEQ0(LNTOP_REG(0205), 0xff),
+	REG_SEQ0(LNTOP_REG(0206), 0x05),
+	REG_SEQ0(LNTOP_REG(0207), 0x0f),
+};
+
 static const struct reg_sequence rk_hdtpx_common_lane_init_seq[] = {
 	REG_SEQ0(LANE_REG(0303), 0x0c),
 	REG_SEQ0(LANE_REG(0307), 0x20),
@@ -550,6 +653,40 @@ static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = {
 	REG_SEQ0(LANE_REG(0606), 0x1c),
 };

+static const struct reg_sequence rk_hdtpx_frl_ropll_lane_init_seq[] = {
+	REG_SEQ0(LANE_REG(0312), 0x3c),
+	REG_SEQ0(LANE_REG(0412), 0x3c),
+	REG_SEQ0(LANE_REG(0512), 0x3c),
+	REG_SEQ0(LANE_REG(0612), 0x3c),
+};
+
+static const struct reg_sequence rk_hdtpx_frl_lcpll_lane_init_seq[] = {
+	REG_SEQ0(LANE_REG(0312), 0x3c),
+	REG_SEQ0(LANE_REG(0412), 0x3c),
+	REG_SEQ0(LANE_REG(0512), 0x3c),
+	REG_SEQ0(LANE_REG(0612), 0x3c),
+	REG_SEQ0(LANE_REG(0303), 0x2f),
+	REG_SEQ0(LANE_REG(0403), 0x2f),
+	REG_SEQ0(LANE_REG(0503), 0x2f),
+	REG_SEQ0(LANE_REG(0603), 0x2f),
+	REG_SEQ0(LANE_REG(0305), 0x03),
+	REG_SEQ0(LANE_REG(0405), 0x03),
+	REG_SEQ0(LANE_REG(0505), 0x03),
+	REG_SEQ0(LANE_REG(0605), 0x03),
+	REG_SEQ0(LANE_REG(0306), 0xfc),
+	REG_SEQ0(LANE_REG(0406), 0xfc),
+	REG_SEQ0(LANE_REG(0506), 0xfc),
+	REG_SEQ0(LANE_REG(0606), 0xfc),
+	REG_SEQ0(LANE_REG(0305), 0x4f),
+	REG_SEQ0(LANE_REG(0405), 0x4f),
+	REG_SEQ0(LANE_REG(0505), 0x4f),
+	REG_SEQ0(LANE_REG(0605), 0x4f),
+	REG_SEQ0(LANE_REG(0304), 0x14),
+	REG_SEQ0(LANE_REG(0404), 0x14),
+	REG_SEQ0(LANE_REG(0504), 0x14),
+	REG_SEQ0(LANE_REG(0604), 0x14),
+};
+
 static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -651,6 +788,47 @@ static int rk_hdptx_post_enable_pll(struct rk_hdptx_phy *hdptx)
 	return 0;
 }

+static int rk_hdptx_post_power_up(struct rk_hdptx_phy *hdptx)
+{
+	u32 val;
+	int ret;
+
+	val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 |
+	       HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN;
+	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+
+	usleep_range(10, 15);
+	reset_control_deassert(hdptx->rsts[RST_INIT].rstc);
+
+	usleep_range(10, 15);
+	val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN;
+	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
+
+	usleep_range(10, 15);
+	reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
+
+	ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
+				       val & HDPTX_O_PLL_LOCK_DONE, 20, 400);
+	if (ret) {
+		dev_err(hdptx->dev, "Failed to get PHY PLL lock: %d\n", ret);
+		return ret;
+	}
+
+	usleep_range(20, 30);
+	reset_control_deassert(hdptx->rsts[RST_LANE].rstc);
+
+	ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val,
+				       val & HDPTX_O_PHY_RDY, 100, 5000);
+	if (ret) {
+		dev_err(hdptx->dev, "Failed to get PHY ready: %d\n", ret);
+		return ret;
+	}
+
+	dev_dbg(hdptx->dev, "PHY ready\n");
+
+	return 0;
+}
+
 static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx)
 {
 	u32 val;
@@ -680,6 +858,99 @@ static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx)
 	regmap_write(hdptx->grf, GRF_HDPTX_CON0, val);
 }

+static void rk_hdptx_earc_config(struct rk_hdptx_phy *hdptx)
+{
+	regmap_update_bits(hdptx->regmap, SB_REG(0113), SB_RX_RCAL_OPT_CODE_MASK,
+			   FIELD_PREP(SB_RX_RCAL_OPT_CODE_MASK, 1));
+	regmap_write(hdptx->regmap, SB_REG(011c), 0x04);
+	regmap_update_bits(hdptx->regmap, SB_REG(011b), SB_AFC_TOL_MASK,
+			   FIELD_PREP(SB_AFC_TOL_MASK, 3));
+	regmap_write(hdptx->regmap, SB_REG(0109), 0x05);
+
+	regmap_update_bits(hdptx->regmap, SB_REG(0120),
+			   SB_EARC_EN_MASK | SB_EARC_AFC_EN_MASK,
+			   FIELD_PREP(SB_EARC_EN_MASK, 1) |
+			   FIELD_PREP(SB_EARC_AFC_EN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(011b), SB_EARC_SIG_DET_BYPASS_MASK,
+			   FIELD_PREP(SB_EARC_SIG_DET_BYPASS_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(011f),
+			   SB_PWM_AFC_CTRL_MASK | SB_RCAL_RSTN_MASK,
+			   FIELD_PREP(SB_PWM_AFC_CTRL_MASK, 0xc) |
+			   FIELD_PREP(SB_RCAL_RSTN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0115), SB_READY_DELAY_TIME_MASK,
+			   FIELD_PREP(SB_READY_DELAY_TIME_MASK, 2));
+	regmap_update_bits(hdptx->regmap, SB_REG(0113), SB_RX_RTERM_CTRL_MASK,
+			   FIELD_PREP(SB_RX_RTERM_CTRL_MASK, 3));
+	regmap_update_bits(hdptx->regmap, SB_REG(0102), ANA_SB_RXTERM_OFFSP_MASK,
+			   FIELD_PREP(ANA_SB_RXTERM_OFFSP_MASK, 3));
+	regmap_update_bits(hdptx->regmap, SB_REG(0103), ANA_SB_RXTERM_OFFSN_MASK,
+			   FIELD_PREP(ANA_SB_RXTERM_OFFSN_MASK, 3));
+
+	regmap_write(hdptx->regmap, SB_REG(011a), 0x03);
+	regmap_write(hdptx->regmap, SB_REG(0118), 0x0a);
+	regmap_write(hdptx->regmap, SB_REG(011e), 0x6a);
+	regmap_write(hdptx->regmap, SB_REG(011d), 0x67);
+
+	regmap_update_bits(hdptx->regmap, SB_REG(0117), FAST_PULSE_TIME_MASK,
+			   FIELD_PREP(FAST_PULSE_TIME_MASK, 4));
+	regmap_update_bits(hdptx->regmap, SB_REG(0114),
+			   SB_TG_SB_EN_DELAY_TIME_MASK | SB_TG_RXTERM_EN_DELAY_TIME_MASK,
+			   FIELD_PREP(SB_TG_SB_EN_DELAY_TIME_MASK, 2) |
+			   FIELD_PREP(SB_TG_RXTERM_EN_DELAY_TIME_MASK, 2));
+	regmap_update_bits(hdptx->regmap, SB_REG(0105), ANA_SB_TX_HLVL_PROG_MASK,
+			   FIELD_PREP(ANA_SB_TX_HLVL_PROG_MASK, 7));
+	regmap_update_bits(hdptx->regmap, SB_REG(0106), ANA_SB_TX_LLVL_PROG_MASK,
+			   FIELD_PREP(ANA_SB_TX_LLVL_PROG_MASK, 7));
+	regmap_update_bits(hdptx->regmap, SB_REG(010f), ANA_SB_VREG_GAIN_CTRL_MASK,
+			   FIELD_PREP(ANA_SB_VREG_GAIN_CTRL_MASK, 0));
+	regmap_update_bits(hdptx->regmap, SB_REG(0110), ANA_SB_VREG_REF_SEL_MASK,
+			   FIELD_PREP(ANA_SB_VREG_REF_SEL_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0115), SB_TG_OSC_EN_DELAY_TIME_MASK,
+			   FIELD_PREP(SB_TG_OSC_EN_DELAY_TIME_MASK, 2));
+	regmap_update_bits(hdptx->regmap, SB_REG(0116), AFC_RSTN_DELAY_TIME_MASK,
+			   FIELD_PREP(AFC_RSTN_DELAY_TIME_MASK, 2));
+	regmap_update_bits(hdptx->regmap, SB_REG(0109), ANA_SB_DMRX_AFC_DIV_RATIO_MASK,
+			   FIELD_PREP(ANA_SB_DMRX_AFC_DIV_RATIO_MASK, 5));
+	regmap_update_bits(hdptx->regmap, SB_REG(0103), OVRD_SB_RX_RESCAL_DONE_MASK,
+			   FIELD_PREP(OVRD_SB_RX_RESCAL_DONE_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0104), OVRD_SB_EN_MASK,
+			   FIELD_PREP(OVRD_SB_EN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0102), OVRD_SB_RXTERM_EN_MASK,
+			   FIELD_PREP(OVRD_SB_RXTERM_EN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0105), OVRD_SB_EARC_CMDC_EN_MASK,
+			   FIELD_PREP(OVRD_SB_EARC_CMDC_EN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(010f),
+			   OVRD_SB_VREG_EN_MASK | OVRD_SB_VREG_LPF_BYPASS_MASK,
+			   FIELD_PREP(OVRD_SB_VREG_EN_MASK, 1) |
+			   FIELD_PREP(OVRD_SB_VREG_LPF_BYPASS_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(0123), OVRD_SB_READY_MASK,
+			   FIELD_PREP(OVRD_SB_READY_MASK, 1));
+
+	usleep_range(1000, 1100);
+	regmap_update_bits(hdptx->regmap, SB_REG(0103), SB_RX_RESCAL_DONE_MASK,
+			   FIELD_PREP(SB_RX_RESCAL_DONE_MASK, 1));
+	usleep_range(50, 60);
+	regmap_update_bits(hdptx->regmap, SB_REG(0104), SB_EN_MASK,
+			   FIELD_PREP(SB_EN_MASK, 1));
+	usleep_range(50, 60);
+	regmap_update_bits(hdptx->regmap, SB_REG(0102), SB_RXTERM_EN_MASK,
+			   FIELD_PREP(SB_RXTERM_EN_MASK, 1));
+	usleep_range(50, 60);
+	regmap_update_bits(hdptx->regmap, SB_REG(0105), SB_EARC_CMDC_EN_MASK,
+			   FIELD_PREP(SB_EARC_CMDC_EN_MASK, 1));
+	regmap_update_bits(hdptx->regmap, SB_REG(010f), SB_VREG_EN_MASK,
+			   FIELD_PREP(SB_VREG_EN_MASK, 1));
+	usleep_range(50, 60);
+	regmap_update_bits(hdptx->regmap, SB_REG(010f), OVRD_SB_VREG_LPF_BYPASS_MASK,
+			   FIELD_PREP(OVRD_SB_VREG_LPF_BYPASS_MASK, 1));
+	usleep_range(250, 300);
+	regmap_update_bits(hdptx->regmap, SB_REG(010f), OVRD_SB_VREG_LPF_BYPASS_MASK,
+			   FIELD_PREP(OVRD_SB_VREG_LPF_BYPASS_MASK, 0));
+	usleep_range(100, 120);
+	regmap_update_bits(hdptx->regmap, SB_REG(0123), SB_READY_MASK,
+			   FIELD_PREP(SB_READY_MASK, 1));
+}
+
 static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
 				      struct ropll_config *cfg)
 {
@@ -755,9 +1026,13 @@ static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate,
 static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
 					  unsigned int rate)
 {
+	int i, bus_width = phy_get_bus_width(hdptx->phy);
+	u8 color_depth = (bus_width & COLOR_DEPTH_MASK) ? 1 : 0;
 	const struct ropll_config *cfg = NULL;
 	struct ropll_config rc = {0};
-	int i;
+
+	if (color_depth)
+		rate = rate * 10 / 8;

 	for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
 		if (rate == ropll_tmds_cfg[i].bit_rate) {
@@ -813,6 +1088,9 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
 	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
 			   FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));

+	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_SEL_MASK,
+			   FIELD_PREP(PLL_PCG_CLK_SEL_MASK, color_depth));
+
 	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN,
 			   PLL_PCG_CLK_EN);

@@ -853,9 +1131,146 @@ static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx,
 	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq);
 	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lane_init_seq);

+	if (hdptx->earc_en)
+		rk_hdptx_earc_config(hdptx);
+
 	return rk_hdptx_post_enable_lane(hdptx);
 }

+static int rk_hdptx_ropll_frl_mode_config(struct rk_hdptx_phy *hdptx,
+					  u32 bus_width)
+{
+	u32 bit_rate = bus_width & DATA_RATE_MASK;
+	u8 color_depth = (bus_width & COLOR_DEPTH_MASK) ? 1 : 0;
+	const struct ropll_config *cfg = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ropll_frl_cfg); i++)
+		if (bit_rate == ropll_frl_cfg[i].bit_rate) {
+			cfg = &ropll_frl_cfg[i];
+			break;
+		}
+
+	if (!cfg) {
+		dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__);
+		return -EINVAL;
+	}
+
+	rk_hdptx_pre_power_up(hdptx);
+
+	reset_control_assert(hdptx->rsts[RST_ROPLL].rstc);
+	usleep_range(10, 20);
+	reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc);
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_cmn_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_ropll_cmn_init_seq);
+
+	regmap_write(hdptx->regmap, CMN_REG(0051), cfg->pms_mdiv);
+	regmap_write(hdptx->regmap, CMN_REG(0055), cfg->pms_mdiv_afc);
+	regmap_write(hdptx->regmap, CMN_REG(0059),
+		     (cfg->pms_pdiv << 4) | cfg->pms_refdiv);
+	regmap_write(hdptx->regmap, CMN_REG(005a), cfg->pms_sdiv << 4);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK,
+			   FIELD_PREP(ROPLL_SDM_EN_MASK, cfg->sdm_en));
+	if (!cfg->sdm_en)
+		regmap_update_bits(hdptx->regmap, CMN_REG(005e), 0xf, 0);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(0064), ROPLL_SDM_NUM_SIGN_RBR_MASK,
+			   FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, cfg->sdm_num_sign));
+
+	regmap_write(hdptx->regmap, CMN_REG(0060), cfg->sdm_deno);
+	regmap_write(hdptx->regmap, CMN_REG(0065), cfg->sdm_num);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK,
+			   FIELD_PREP(ROPLL_SDC_N_RBR_MASK, cfg->sdc_n));
+
+	regmap_write(hdptx->regmap, CMN_REG(006c), cfg->sdc_num);
+	regmap_write(hdptx->regmap, CMN_REG(0070), cfg->sdc_deno);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
+			   FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));
+	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_SEL_MASK,
+			   FIELD_PREP(PLL_PCG_CLK_SEL_MASK, color_depth));
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_lntop_init_seq);
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_ropll_lane_init_seq);
+
+	if (hdptx->earc_en)
+		rk_hdptx_earc_config(hdptx);
+
+	return rk_hdptx_post_power_up(hdptx);
+}
+
+static int rk_hdptx_lcpll_frl_mode_config(struct rk_hdptx_phy *hdptx,
+					  u32 bus_width)
+{
+	u32 bit_rate = bus_width & DATA_RATE_MASK;
+	u8 color_depth = (bus_width & COLOR_DEPTH_MASK) ? 1 : 0;
+	const struct lcpll_config *cfg = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(lcpll_cfg); i++)
+		if (bit_rate == lcpll_cfg[i].bit_rate) {
+			cfg = &lcpll_cfg[i];
+			break;
+		}
+
+	if (!cfg) {
+		dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__);
+		return -EINVAL;
+	}
+
+	rk_hdptx_pre_power_up(hdptx);
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_cmn_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_lcpll_cmn_init_seq);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(0008),
+			   LCPLL_EN_MASK | LCPLL_LCVCO_MODE_EN_MASK,
+			   FIELD_PREP(LCPLL_EN_MASK, 1) |
+			   FIELD_PREP(LCPLL_LCVCO_MODE_EN_MASK, cfg->lcvco_mode_en));
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(001e),
+			   LCPLL_PI_EN_MASK | LCPLL_100M_CLK_EN_MASK,
+			   FIELD_PREP(LCPLL_PI_EN_MASK, cfg->pi_en) |
+			   FIELD_PREP(LCPLL_100M_CLK_EN_MASK, cfg->clk_en_100m));
+
+	regmap_write(hdptx->regmap, CMN_REG(0020), cfg->pms_mdiv);
+	regmap_write(hdptx->regmap, CMN_REG(0021), cfg->pms_mdiv_afc);
+	regmap_write(hdptx->regmap, CMN_REG(0022),
+		     (cfg->pms_pdiv << 4) | cfg->pms_refdiv);
+	regmap_write(hdptx->regmap, CMN_REG(0023),
+		     (cfg->pms_sdiv << 4) | cfg->pms_sdiv);
+	regmap_write(hdptx->regmap, CMN_REG(002a), cfg->sdm_deno);
+	regmap_write(hdptx->regmap, CMN_REG(002b), cfg->sdm_num_sign);
+	regmap_write(hdptx->regmap, CMN_REG(002c), cfg->sdm_num);
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(002d), LCPLL_SDC_N_MASK,
+			   FIELD_PREP(LCPLL_SDC_N_MASK, cfg->sdc_n));
+
+	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK,
+			   FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv));
+	regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_SEL_MASK,
+			   FIELD_PREP(PLL_PCG_CLK_SEL_MASK, color_depth));
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_lntop_init_seq);
+
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq);
+	rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_frl_lcpll_lane_init_seq);
+
+	if (hdptx->earc_en)
+		rk_hdptx_earc_config(hdptx);
+
+	return rk_hdptx_post_power_up(hdptx);
+}
+
 static int rk_hdptx_phy_power_on(struct phy *phy)
 {
 	struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
@@ -865,7 +1280,7 @@ static int rk_hdptx_phy_power_on(struct phy *phy)
 	 * from the HDMI bridge driver until phy_configure_opts_hdmi
 	 * becomes available in the PHY API.
 	 */
-	unsigned int rate = bus_width & 0xfffffff;
+	unsigned int rate = bus_width & DATA_RATE_MASK;

 	dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n",
 		__func__, bus_width, rate);
@@ -876,7 +1291,20 @@ static int rk_hdptx_phy_power_on(struct phy *phy)
 		return ret;
 	}

-	ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate);
+	if (bus_width & HDMI_EARC_MASK)
+		hdptx->earc_en = true;
+	else
+		hdptx->earc_en = false;
+
+	if (bus_width & HDMI_MODE_MASK) {
+		if (rate > 24000000)
+			ret = rk_hdptx_lcpll_frl_mode_config(hdptx, bus_width);
+		else
+			ret = rk_hdptx_ropll_frl_mode_config(hdptx, bus_width);
+	} else {
+		ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate);
+	}
+
 	if (ret)
 		pm_runtime_put(hdptx->dev);

--
2.42.0


From bf89f0fbcd824b19ac7ed9a91309b186a86c7343 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Mon, 19 Feb 2024 21:53:24 +0200
Subject: [PATCH 51/69] [WIP] dt-bindings: phy: rockchip,rk3588-hdptx-phy: Add
 #clock-cells

The PHY can be used as a clock provider on RK3588, hence add the missing
'#clock-cells' property.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml     | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml
index 54e822c715f3..84fe59dbcf48 100644
--- a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml
@@ -27,6 +27,9 @@ properties:
       - const: ref
       - const: apb

+  "#clock-cells":
+    const: 0
+
   "#phy-cells":
     const: 0

--
2.42.0


From 55314349c4db551c490d9f084d0626020cd4a2d7 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Tue, 16 Jan 2024 19:27:40 +0200
Subject: [PATCH 52/69] [WIP] phy: phy-rockchip-samsung-hdptx: Add clock
 provider

The HDMI PHY PLL can be used as an alternative dclk source to SoC CRU.
It provides more accurate clock rates required to properly support
various display modes, e.g. those relying on non-integer refresh rates.

Also note this only works for HDMI 2.0 or bellow, e.g. cannot be used to
support HDMI 2.1 4K@120Hz mode.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 148 +++++++++++++++++-
 1 file changed, 143 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
index 44acea3f86af..a3ac4f3835bc 100644
--- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c
@@ -8,6 +8,7 @@
  */
 #include <linux/bitfield.h>
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/delay.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
@@ -279,6 +280,12 @@ struct rk_hdptx_phy {
 	int nr_clks;
 	struct reset_control_bulk_data rsts[RST_MAX];
 	bool earc_en;
+
+	/* clk provider */
+	struct clk_hw hw;
+	unsigned long rate;
+	int id;
+	int count;
 };

 static const struct lcpll_config lcpll_cfg[] = {
@@ -1031,6 +1038,8 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx,
 	const struct ropll_config *cfg = NULL;
 	struct ropll_config rc = {0};

+	hdptx->rate = rate * 100;
+
 	if (color_depth)
 		rate = rate * 10 / 8;

@@ -1315,11 +1324,13 @@ static int rk_hdptx_phy_power_off(struct phy *phy)
 {
 	struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy);
 	u32 val;
-	int ret;
+	int ret = 0;

-	ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
-	if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
-		rk_hdptx_phy_disable(hdptx);
+	if (hdptx->count == 0) {
+		ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
+		if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
+			rk_hdptx_phy_disable(hdptx);
+	}

 	pm_runtime_put(hdptx->dev);

@@ -1332,6 +1343,129 @@ static const struct phy_ops rk_hdptx_phy_ops = {
 	.owner	   = THIS_MODULE,
 };

+static struct rk_hdptx_phy *to_rk_hdptx_phy(struct clk_hw *hw)
+{
+	return container_of(hw, struct rk_hdptx_phy, hw);
+}
+
+static int rk_hdptx_phy_clk_prepare(struct clk_hw *hw)
+{
+	struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
+	int ret;
+
+	ret = pm_runtime_resume_and_get(hdptx->dev);
+	if (ret) {
+		dev_err(hdptx->dev, "Failed to resume phy clk: %d\n", ret);
+		return ret;
+	}
+
+	if (!hdptx->count && hdptx->rate) {
+		ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, hdptx->rate / 100);
+		if (ret < 0) {
+			dev_err(hdptx->dev, "Failed to init PHY PLL: %d\n", ret);
+			pm_runtime_put(hdptx->dev);
+			return ret;
+		}
+	}
+
+	hdptx->count++;
+
+	return 0;
+}
+
+static void rk_hdptx_phy_clk_unprepare(struct clk_hw *hw)
+{
+	struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
+
+	if (hdptx->count == 1) {
+		u32 val;
+		int ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
+		if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
+			rk_hdptx_phy_disable(hdptx);
+	}
+
+	hdptx->count--;
+	pm_runtime_put(hdptx->dev);
+}
+
+static unsigned long rk_hdptx_phy_clk_recalc_rate(struct clk_hw *hw,
+						  unsigned long parent_rate)
+{
+	struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
+
+	return hdptx->rate;
+}
+
+static long rk_hdptx_phy_clk_round_rate(struct clk_hw *hw, unsigned long rate,
+					unsigned long *parent_rate)
+{
+	const struct ropll_config *cfg = NULL;
+	u32 bit_rate = rate / 100;
+	int i;
+
+	if (rate > HDMI20_MAX_RATE)
+		return rate;
+
+	for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++)
+		if (bit_rate == ropll_tmds_cfg[i].bit_rate) {
+			cfg = &ropll_tmds_cfg[i];
+			break;
+		}
+
+	if (!cfg && !rk_hdptx_phy_clk_pll_calc(bit_rate, NULL))
+		return -EINVAL;
+
+	return rate;
+}
+
+static int rk_hdptx_phy_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long parent_rate)
+{
+	struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw);
+	u32 val;
+	int ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val);
+	if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE))
+		rk_hdptx_phy_disable(hdptx);
+
+	return rk_hdptx_ropll_tmds_cmn_config(hdptx, rate / 100);
+}
+
+static const struct clk_ops hdptx_phy_clk_ops = {
+	.prepare = rk_hdptx_phy_clk_prepare,
+	.unprepare = rk_hdptx_phy_clk_unprepare,
+	.recalc_rate = rk_hdptx_phy_clk_recalc_rate,
+	.round_rate = rk_hdptx_phy_clk_round_rate,
+	.set_rate = rk_hdptx_phy_clk_set_rate,
+};
+
+static int rk_hdptx_phy_clk_register(struct rk_hdptx_phy *hdptx)
+{
+	struct device *dev = hdptx->dev;
+	const char *name, *pname;
+	struct clk *refclk;
+	int ret;
+
+	refclk = devm_clk_get(dev, "ref");
+	if (IS_ERR(refclk))
+		return dev_err_probe(dev, PTR_ERR(refclk),
+				     "Failed to get ref clock\n");
+
+	pname = __clk_get_name(refclk);
+	name = hdptx->id ? "clk_hdmiphy_pixel1" : "clk_hdmiphy_pixel0";
+	hdptx->hw.init = CLK_HW_INIT(name, pname, &hdptx_phy_clk_ops,
+				     CLK_GET_RATE_NOCACHE);
+
+	ret = devm_clk_hw_register(dev, &hdptx->hw);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to register clock\n");
+
+	ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, &hdptx->hw);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to register clk provider\n");
+	return 0;
+}
+
 static int rk_hdptx_phy_runtime_suspend(struct device *dev)
 {
 	struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev);
@@ -1367,6 +1501,10 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev)

 	hdptx->dev = dev;

+	hdptx->id = of_alias_get_id(dev->of_node, "hdptxphy");
+	if (hdptx->id < 0)
+		hdptx->id = 0;
+
 	regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(regs))
 		return dev_err_probe(dev, PTR_ERR(regs),
@@ -1426,7 +1564,7 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev)
 	reset_control_deassert(hdptx->rsts[RST_CMN].rstc);
 	reset_control_deassert(hdptx->rsts[RST_INIT].rstc);

-	return 0;
+	return rk_hdptx_phy_clk_register(hdptx);
 }

 static const struct dev_pm_ops rk_hdptx_phy_pm_ops = {
--
2.42.0


From dbe364bb8edc768a404e7ba423cf2ceaaa265a19 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Wed, 1 Nov 2023 18:50:38 +0200
Subject: [PATCH 53/69] [WIP] drm/bridge: synopsys: Add initial support for DW
 HDMI QP TX Controller

Co-developed-by: Algea Cao <algea.cao@rock-chips.com>
Signed-off-by: Algea Cao <algea.cao@rock-chips.com>
Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 drivers/gpu/drm/bridge/synopsys/Makefile     |    2 +-
 drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c | 2401 +++++++++++++++
 drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h |  831 ++++++
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.c    |   89 +
 drivers/gpu/drm/bridge/synopsys/dw-hdmi.h    |    4 +
 drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c  | 2740 +++++++++++++++++-
 include/drm/bridge/dw_hdmi.h                 |  101 +
 7 files changed, 6102 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile
index ce715562e9e5..8354e4879f70 100644
--- a/drivers/gpu/drm/bridge/synopsys/Makefile
+++ b/drivers/gpu/drm/bridge/synopsys/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o
+obj-$(CONFIG_DRM_DW_HDMI) += dw-hdmi.o dw-hdmi-qp.o
 obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o
 obj-$(CONFIG_DRM_DW_HDMI_GP_AUDIO) += dw-hdmi-gp-audio.o
 obj-$(CONFIG_DRM_DW_HDMI_I2S_AUDIO) += dw-hdmi-i2s-audio.o
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c
new file mode 100644
index 000000000000..8817ef9a9de9
--- /dev/null
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c
@@ -0,0 +1,2401 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) Rockchip Electronics Co.Ltd
+ * Author:
+ *      Algea Cao <algea.cao@rock-chips.com>
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/extcon-provider.h>
+#include <linux/extcon.h>
+#include <linux/hdmi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/regmap.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/display/drm_dsc.h>
+#include <drm/display/drm_hdmi_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_encoder_slave.h>
+#include <drm/drm_of.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/display/drm_scdc_helper.h>
+#include <drm/bridge/dw_hdmi.h>
+
+#include <uapi/linux/media-bus-format.h>
+#include <uapi/linux/videodev2.h>
+
+#include "dw-hdmi-qp.h"
+
+#define DDC_CI_ADDR		0x37
+#define DDC_SEGMENT_ADDR	0x30
+
+#define HDMI_EDID_LEN		512
+
+/* DW-HDMI Controller >= 0x200a are at least compliant with SCDC version 1 */
+#define SCDC_MIN_SOURCE_VERSION	0x1
+
+#define HDMI14_MAX_TMDSCLK	340000000
+#define HDMI20_MAX_TMDSCLK_KHZ	600000
+
+static const unsigned int dw_hdmi_cable[] = {
+	EXTCON_DISP_HDMI,
+	EXTCON_NONE,
+};
+
+static const struct drm_display_mode dw_hdmi_default_modes[] = {
+	/* 16 - 1920x1080@60Hz 16:9 */
+	{ DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2008,
+		   2052, 2200, 0, 1080, 1084, 1089, 1125, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 2 - 720x480@60Hz 4:3 */
+	{ DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 27000, 720, 736,
+		   798, 858, 0, 480, 489, 495, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
+	/* 4 - 1280x720@60Hz 16:9 */
+	{ DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1390,
+		   1430, 1650, 0, 720, 725, 730, 750, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 31 - 1920x1080@50Hz 16:9 */
+	{ DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 148500, 1920, 2448,
+		   2492, 2640, 0, 1080, 1084, 1089, 1125, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 19 - 1280x720@50Hz 16:9 */
+	{ DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74250, 1280, 1720,
+		   1760, 1980, 0, 720, 725, 730, 750, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
+	/* 17 - 720x576@50Hz 4:3 */
+	{ DRM_MODE("720x576", DRM_MODE_TYPE_DRIVER, 27000, 720, 732,
+		   796, 864, 0, 576, 581, 586, 625, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
+	/* 2 - 720x480@60Hz 4:3 */
+	{ DRM_MODE("720x480", DRM_MODE_TYPE_DRIVER, 27000, 720, 736,
+		   798, 858, 0, 480, 489, 495, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
+	  .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
+};
+
+enum frl_mask {
+	FRL_3GBPS_3LANE = 1,
+	FRL_6GBPS_3LANE,
+	FRL_6GBPS_4LANE,
+	FRL_8GBPS_4LANE,
+	FRL_10GBPS_4LANE,
+	FRL_12GBPS_4LANE,
+};
+
+struct hdmi_vmode_qp {
+	bool mdataenablepolarity;
+
+	unsigned int previous_pixelclock;
+	unsigned long mpixelclock;
+	unsigned int mpixelrepetitioninput;
+	unsigned int mpixelrepetitionoutput;
+	unsigned long previous_tmdsclock;
+	unsigned int mtmdsclock;
+};
+
+struct hdmi_qp_data_info {
+	unsigned int enc_in_bus_format;
+	unsigned int enc_out_bus_format;
+	unsigned int enc_in_encoding;
+	unsigned int enc_out_encoding;
+	unsigned int quant_range;
+	unsigned int pix_repet_factor;
+	struct hdmi_vmode_qp video_mode;
+	bool update;
+};
+
+struct dw_hdmi_qp_i2c {
+	struct i2c_adapter	adap;
+
+	struct mutex		lock;	/* used to serialize data transfers */
+	struct completion	cmp;
+	u32			stat;
+
+	u8			slave_reg;
+	bool			is_regaddr;
+	bool			is_segment;
+
+	unsigned int		scl_high_ns;
+	unsigned int		scl_low_ns;
+};
+
+struct dw_hdmi_qp {
+	struct drm_connector connector;
+	struct drm_bridge bridge;
+	struct platform_device *hdcp_dev;
+	struct platform_device *audio;
+	struct platform_device *cec;
+	struct device *dev;
+	struct dw_hdmi_qp_i2c *i2c;
+
+	struct hdmi_qp_data_info hdmi_data;
+	const struct dw_hdmi_plat_data *plat_data;
+
+	int vic;
+	int main_irq;
+	int avp_irq;
+	int earc_irq;
+
+	u8 edid[HDMI_EDID_LEN];
+
+	struct {
+		const struct dw_hdmi_qp_phy_ops *ops;
+		const char *name;
+		void *data;
+		bool enabled;
+	} phy;
+
+	struct drm_display_mode previous_mode;
+
+	struct i2c_adapter *ddc;
+	void __iomem *regs;
+	bool sink_is_hdmi;
+
+	struct mutex mutex;		/* for state below and previous_mode */
+	//[CC:] curr_conn should be removed
+	struct drm_connector *curr_conn;/* current connector (only valid when !disabled) */
+	enum drm_connector_force force;	/* mutex-protected force state */
+	bool disabled;			/* DRM has disabled our bridge */
+	bool bridge_is_on;		/* indicates the bridge is on */
+	bool rxsense;			/* rxsense state */
+	u8 phy_mask;			/* desired phy int mask settings */
+	u8 mc_clkdis;			/* clock disable register */
+
+	u32 scdc_intr;
+	u32 flt_intr;
+	//[CC:] remove earc
+	u32 earc_intr;
+
+	struct dentry *debugfs_dir;
+	bool scramble_low_rates;
+
+	struct extcon_dev *extcon;
+
+	struct regmap *regm;
+
+	bool initialized;		/* hdmi is enabled before bind */
+	struct completion flt_cmp;
+	struct completion earc_cmp;
+
+	hdmi_codec_plugged_cb plugged_cb;
+	struct device *codec_dev;
+	enum drm_connector_status last_connector_result;
+};
+
+static inline void hdmi_writel(struct dw_hdmi_qp *hdmi, u32 val, int offset)
+{
+	regmap_write(hdmi->regm, offset, val);
+}
+
+static inline u32 hdmi_readl(struct dw_hdmi_qp *hdmi, int offset)
+{
+	unsigned int val = 0;
+
+	regmap_read(hdmi->regm, offset, &val);
+
+	return val;
+}
+
+static void handle_plugged_change(struct dw_hdmi_qp *hdmi, bool plugged)
+{
+	if (hdmi->plugged_cb && hdmi->codec_dev)
+		hdmi->plugged_cb(hdmi->codec_dev, plugged);
+}
+
+static void hdmi_modb(struct dw_hdmi_qp *hdmi, u32 data, u32 mask, u32 reg)
+{
+	regmap_update_bits(hdmi->regm, reg, mask, data);
+}
+
+static bool hdmi_bus_fmt_is_rgb(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv444(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv422(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv420(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static int hdmi_bus_fmt_color_depth(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+		return 8;
+
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		return 10;
+
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+		return 12;
+
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		return 16;
+
+	default:
+		return 0;
+	}
+}
+
+static void dw_hdmi_i2c_init(struct dw_hdmi_qp *hdmi)
+{
+	/* Software reset */
+	hdmi_writel(hdmi, 0x01, I2CM_CONTROL0);
+
+	hdmi_writel(hdmi, 0x085c085c, I2CM_FM_SCL_CONFIG0);
+
+	hdmi_modb(hdmi, 0, I2CM_FM_EN, I2CM_INTERFACE_CONTROL0);
+
+	/* Clear DONE and ERROR interrupts */
+	hdmi_writel(hdmi, I2CM_OP_DONE_CLEAR | I2CM_NACK_RCVD_CLEAR,
+		    MAINUNIT_1_INT_CLEAR);
+}
+
+static int dw_hdmi_i2c_read(struct dw_hdmi_qp *hdmi,
+			    unsigned char *buf, unsigned int length)
+{
+	struct dw_hdmi_qp_i2c *i2c = hdmi->i2c;
+	int stat;
+
+	if (!i2c->is_regaddr) {
+		dev_dbg(hdmi->dev, "set read register address to 0\n");
+		i2c->slave_reg = 0x00;
+		i2c->is_regaddr = true;
+	}
+
+	while (length--) {
+		reinit_completion(&i2c->cmp);
+
+		hdmi_modb(hdmi, i2c->slave_reg++ << 12, I2CM_ADDR,
+			  I2CM_INTERFACE_CONTROL0);
+
+		hdmi_modb(hdmi, I2CM_FM_READ, I2CM_WR_MASK,
+			  I2CM_INTERFACE_CONTROL0);
+
+		stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
+		if (!stat) {
+			dev_err(hdmi->dev, "i2c read time out!\n");
+			hdmi_writel(hdmi, 0x01, I2CM_CONTROL0);
+			return -EAGAIN;
+		}
+
+		/* Check for error condition on the bus */
+		if (i2c->stat & I2CM_NACK_RCVD_IRQ) {
+			dev_err(hdmi->dev, "i2c read err!\n");
+			hdmi_writel(hdmi, 0x01, I2CM_CONTROL0);
+			return -EIO;
+		}
+
+		*buf++ = hdmi_readl(hdmi, I2CM_INTERFACE_RDDATA_0_3) & 0xff;
+		hdmi_modb(hdmi, 0, I2CM_WR_MASK, I2CM_INTERFACE_CONTROL0);
+	}
+	i2c->is_segment = false;
+
+	return 0;
+}
+
+static int dw_hdmi_i2c_write(struct dw_hdmi_qp *hdmi,
+			     unsigned char *buf, unsigned int length)
+{
+	struct dw_hdmi_qp_i2c *i2c = hdmi->i2c;
+	int stat;
+
+	if (!i2c->is_regaddr) {
+		/* Use the first write byte as register address */
+		i2c->slave_reg = buf[0];
+		length--;
+		buf++;
+		i2c->is_regaddr = true;
+	}
+
+	while (length--) {
+		reinit_completion(&i2c->cmp);
+
+		hdmi_writel(hdmi, *buf++, I2CM_INTERFACE_WRDATA_0_3);
+		hdmi_modb(hdmi, i2c->slave_reg++ << 12, I2CM_ADDR,
+			  I2CM_INTERFACE_CONTROL0);
+		hdmi_modb(hdmi, I2CM_FM_WRITE, I2CM_WR_MASK,
+			  I2CM_INTERFACE_CONTROL0);
+
+		stat = wait_for_completion_timeout(&i2c->cmp, HZ / 10);
+		if (!stat) {
+			dev_err(hdmi->dev, "i2c write time out!\n");
+			hdmi_writel(hdmi, 0x01, I2CM_CONTROL0);
+			return -EAGAIN;
+		}
+
+		/* Check for error condition on the bus */
+		if (i2c->stat & I2CM_NACK_RCVD_IRQ) {
+			dev_err(hdmi->dev, "i2c write nack!\n");
+			hdmi_writel(hdmi, 0x01, I2CM_CONTROL0);
+			return -EIO;
+		}
+		hdmi_modb(hdmi, 0, I2CM_WR_MASK, I2CM_INTERFACE_CONTROL0);
+	}
+
+	return 0;
+}
+
+static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap,
+			    struct i2c_msg *msgs, int num)
+{
+	struct dw_hdmi_qp *hdmi = i2c_get_adapdata(adap);
+	struct dw_hdmi_qp_i2c *i2c = hdmi->i2c;
+	u8 addr = msgs[0].addr;
+	int i, ret = 0;
+
+	if (addr == DDC_CI_ADDR)
+		/*
+		 * The internal I2C controller does not support the multi-byte
+		 * read and write operations needed for DDC/CI.
+		 * TOFIX: Blacklist the DDC/CI address until we filter out
+		 * unsupported I2C operations.
+		 */
+		return -EOPNOTSUPP;
+
+	for (i = 0; i < num; i++) {
+		if (msgs[i].len == 0) {
+			dev_err(hdmi->dev,
+				"unsupported transfer %d/%d, no data\n",
+				i + 1, num);
+			return -EOPNOTSUPP;
+		}
+	}
+
+	mutex_lock(&i2c->lock);
+
+	/* Unmute DONE and ERROR interrupts */
+	hdmi_modb(hdmi, I2CM_NACK_RCVD_MASK_N | I2CM_OP_DONE_MASK_N,
+		  I2CM_NACK_RCVD_MASK_N | I2CM_OP_DONE_MASK_N,
+		  MAINUNIT_1_INT_MASK_N);
+
+	/* Set slave device address taken from the first I2C message */
+	if (addr == DDC_SEGMENT_ADDR && msgs[0].len == 1)
+		addr = DDC_ADDR;
+
+	hdmi_modb(hdmi, addr << 5, I2CM_SLVADDR, I2CM_INTERFACE_CONTROL0);
+
+	/* Set slave device register address on transfer */
+	i2c->is_regaddr = false;
+
+	/* Set segment pointer for I2C extended read mode operation */
+	i2c->is_segment = false;
+
+	for (i = 0; i < num; i++) {
+		if (msgs[i].addr == DDC_SEGMENT_ADDR && msgs[i].len == 1) {
+			i2c->is_segment = true;
+			hdmi_modb(hdmi, DDC_SEGMENT_ADDR, I2CM_SEG_ADDR,
+				  I2CM_INTERFACE_CONTROL1);
+			hdmi_modb(hdmi, *msgs[i].buf, I2CM_SEG_PTR,
+				  I2CM_INTERFACE_CONTROL1);
+		} else {
+			if (msgs[i].flags & I2C_M_RD)
+				ret = dw_hdmi_i2c_read(hdmi, msgs[i].buf,
+						       msgs[i].len);
+			else
+				ret = dw_hdmi_i2c_write(hdmi, msgs[i].buf,
+							msgs[i].len);
+		}
+		if (ret < 0)
+			break;
+	}
+
+	if (!ret)
+		ret = num;
+
+	/* Mute DONE and ERROR interrupts */
+	hdmi_modb(hdmi, 0, I2CM_OP_DONE_MASK_N | I2CM_NACK_RCVD_MASK_N,
+		  MAINUNIT_1_INT_MASK_N);
+
+	mutex_unlock(&i2c->lock);
+
+	return ret;
+}
+
+static u32 dw_hdmi_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm dw_hdmi_algorithm = {
+	.master_xfer	= dw_hdmi_i2c_xfer,
+	.functionality	= dw_hdmi_i2c_func,
+};
+
+static struct i2c_adapter *dw_hdmi_i2c_adapter(struct dw_hdmi_qp *hdmi)
+{
+	struct i2c_adapter *adap;
+	struct dw_hdmi_qp_i2c *i2c;
+	int ret;
+
+	i2c = devm_kzalloc(hdmi->dev, sizeof(*i2c), GFP_KERNEL);
+	if (!i2c)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_init(&i2c->lock);
+	init_completion(&i2c->cmp);
+
+	adap = &i2c->adap;
+	adap->owner = THIS_MODULE;
+	adap->dev.parent = hdmi->dev;
+	adap->algo = &dw_hdmi_algorithm;
+	strscpy(adap->name, "ddc", sizeof(adap->name));
+	i2c_set_adapdata(adap, hdmi);
+
+	ret = i2c_add_adapter(adap);
+	if (ret) {
+		dev_warn(hdmi->dev, "cannot add %s I2C adapter\n", adap->name);
+		devm_kfree(hdmi->dev, i2c);
+		return ERR_PTR(ret);
+	}
+
+	hdmi->i2c = i2c;
+
+	dev_info(hdmi->dev, "registered %s I2C bus driver\n", adap->name);
+
+	return adap;
+}
+
+#define HDMI_PHY_EARC_MASK	BIT(29)
+
+/* -----------------------------------------------------------------------------
+ * HDMI TX Setup
+ */
+
+static void hdmi_infoframe_set_checksum(u8 *ptr, int size)
+{
+	u8 csum = 0;
+	int i;
+
+	ptr[3] = 0;
+	/* compute checksum */
+	for (i = 0; i < size; i++)
+		csum += ptr[i];
+
+	ptr[3] = 256 - csum;
+}
+
+static void hdmi_config_AVI(struct dw_hdmi_qp *hdmi,
+			    const struct drm_connector *connector,
+			    const struct drm_display_mode *mode)
+{
+	struct hdmi_avi_infoframe frame;
+	u32 val, i, j;
+	u8 buff[17];
+	enum hdmi_quantization_range rgb_quant_range =
+		hdmi->hdmi_data.quant_range;
+
+	/* Initialise info frame from DRM mode */
+	drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
+
+	/*
+	 * Ignore monitor selectable quantization, use quantization set
+	 * by the user
+	 */
+	drm_hdmi_avi_infoframe_quant_range(&frame, connector, mode, rgb_quant_range);
+	if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format))
+		frame.colorspace = HDMI_COLORSPACE_YUV444;
+	else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format))
+		frame.colorspace = HDMI_COLORSPACE_YUV422;
+	else if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format))
+		frame.colorspace = HDMI_COLORSPACE_YUV420;
+	else
+		frame.colorspace = HDMI_COLORSPACE_RGB;
+
+	/* Set up colorimetry */
+	if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
+		switch (hdmi->hdmi_data.enc_out_encoding) {
+		case V4L2_YCBCR_ENC_601:
+			if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
+				frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+			else
+				frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+			frame.extended_colorimetry =
+					HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+			break;
+		case V4L2_YCBCR_ENC_709:
+			if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
+				frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+			else
+				frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
+			frame.extended_colorimetry =
+					HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
+			break;
+		case V4L2_YCBCR_ENC_BT2020:
+			if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_BT2020)
+				frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+			else
+				frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
+			frame.extended_colorimetry =
+					HDMI_EXTENDED_COLORIMETRY_BT2020;
+			break;
+		default: /* Carries no data */
+			frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+			frame.extended_colorimetry =
+					HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+			break;
+		}
+	} else {
+		if (hdmi->hdmi_data.enc_out_encoding == V4L2_YCBCR_ENC_BT2020) {
+			frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+			frame.extended_colorimetry =
+				HDMI_EXTENDED_COLORIMETRY_BT2020;
+		} else {
+			frame.colorimetry = HDMI_COLORIMETRY_NONE;
+			frame.extended_colorimetry =
+				HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+		}
+	}
+
+	frame.scan_mode = HDMI_SCAN_MODE_NONE;
+	frame.video_code = hdmi->vic;
+
+	hdmi_avi_infoframe_pack_only(&frame, buff, 17);
+
+	/* mode which vic >= 128 must use avi version 3 */
+	if (hdmi->vic >= 128) {
+		frame.version = 3;
+		buff[1] = frame.version;
+		buff[4] &= 0x1f;
+		buff[4] |= ((frame.colorspace & 0x7) << 5);
+		buff[7] = frame.video_code;
+		hdmi_infoframe_set_checksum(buff, 17);
+	}
+
+	/*
+	 * The Designware IP uses a different byte format from standard
+	 * AVI info frames, though generally the bits are in the correct
+	 * bytes.
+	 */
+
+	val = (frame.version << 8) | (frame.length << 16);
+	hdmi_writel(hdmi, val, PKT_AVI_CONTENTS0);
+
+	for (i = 0; i < 4; i++) {
+		for (j = 0; j < 4; j++) {
+			if (i * 4 + j >= 14)
+				break;
+			if (!j)
+				val = buff[i * 4 + j + 3];
+			val |= buff[i * 4 + j + 3] << (8 * j);
+		}
+
+		hdmi_writel(hdmi, val, PKT_AVI_CONTENTS1 + i * 4);
+	}
+
+	hdmi_modb(hdmi, 0, PKTSCHED_AVI_FIELDRATE, PKTSCHED_PKT_CONFIG1);
+
+	hdmi_modb(hdmi, PKTSCHED_AVI_TX_EN | PKTSCHED_GCP_TX_EN,
+		  PKTSCHED_AVI_TX_EN | PKTSCHED_GCP_TX_EN,
+		  PKTSCHED_PKT_EN);
+}
+
+static void hdmi_config_CVTEM(struct dw_hdmi_qp *hdmi)
+{
+	u8 ds_type = 0;
+	u8 sync = 1;
+	u8 vfr = 1;
+	u8 afr = 0;
+	u8 new = 1;
+	u8 end = 0;
+	u8 data_set_length = 136;
+	u8 hb1[6] = { 0x80, 0, 0, 0, 0, 0x40 };
+	u8 *pps_body;
+	u32 val, i, reg;
+	struct drm_display_mode *mode = &hdmi->previous_mode;
+	int hsync, hfront, hback;
+	struct dw_hdmi_link_config *link_cfg;
+	void *data = hdmi->plat_data->phy_data;
+
+	hdmi_modb(hdmi, 0, PKTSCHED_EMP_CVTEM_TX_EN, PKTSCHED_PKT_EN);
+
+	if (hdmi->plat_data->get_link_cfg) {
+		link_cfg = hdmi->plat_data->get_link_cfg(data);
+	} else {
+		dev_err(hdmi->dev, "can't get frl link cfg\n");
+		return;
+	}
+
+	if (!link_cfg->dsc_mode) {
+		dev_info(hdmi->dev, "don't use dsc mode\n");
+		return;
+	}
+
+	pps_body = link_cfg->pps_payload;
+
+	hsync = mode->hsync_end - mode->hsync_start;
+	hback = mode->htotal - mode->hsync_end;
+	hfront = mode->hsync_start - mode->hdisplay;
+
+	for (i = 0; i < 6; i++) {
+		val = i << 16 | hb1[i] << 8;
+		hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS0 + i * 0x20);
+	}
+
+	val = new << 7 | end << 6 | ds_type << 4 | afr << 3 |
+	      vfr << 2 | sync << 1;
+	hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS1);
+
+	val = data_set_length << 16 | pps_body[0] << 24;
+	hdmi_writel(hdmi, val, PKT0_EMP_CVTEM_CONTENTS2);
+
+	reg = PKT0_EMP_CVTEM_CONTENTS3;
+	for (i = 1; i < 125; i++) {
+		if (reg == PKT1_EMP_CVTEM_CONTENTS0 ||
+		    reg == PKT2_EMP_CVTEM_CONTENTS0 ||
+		    reg == PKT3_EMP_CVTEM_CONTENTS0 ||
+		    reg == PKT4_EMP_CVTEM_CONTENTS0 ||
+		    reg == PKT5_EMP_CVTEM_CONTENTS0) {
+			reg += 4;
+			i--;
+			continue;
+		}
+		if (i % 4 == 1)
+			val = pps_body[i];
+		if (i % 4 == 2)
+			val |= pps_body[i] << 8;
+		if (i % 4 == 3)
+			val |= pps_body[i] << 16;
+		if (!(i % 4)) {
+			val |= pps_body[i] << 24;
+			hdmi_writel(hdmi, val, reg);
+			reg += 4;
+		}
+	}
+
+	val = (hfront & 0xff) << 24 | pps_body[127] << 16 |
+	      pps_body[126] << 8 | pps_body[125];
+	hdmi_writel(hdmi, val, PKT4_EMP_CVTEM_CONTENTS6);
+
+	val = (hback & 0xff) << 24 | ((hsync >> 8) & 0xff) << 16 |
+	      (hsync & 0xff) << 8 | ((hfront >> 8) & 0xff);
+	hdmi_writel(hdmi, val, PKT4_EMP_CVTEM_CONTENTS7);
+
+	val = link_cfg->hcactive << 8 | ((hback >> 8) & 0xff);
+	hdmi_writel(hdmi, val, PKT5_EMP_CVTEM_CONTENTS1);
+
+	for (i = PKT5_EMP_CVTEM_CONTENTS2; i <= PKT5_EMP_CVTEM_CONTENTS7; i += 4)
+		hdmi_writel(hdmi, 0, i);
+
+	hdmi_modb(hdmi, PKTSCHED_EMP_CVTEM_TX_EN, PKTSCHED_EMP_CVTEM_TX_EN,
+		  PKTSCHED_PKT_EN);
+}
+
+static void hdmi_config_drm_infoframe(struct dw_hdmi_qp *hdmi,
+				      const struct drm_connector *connector)
+{
+	const struct drm_connector_state *conn_state = connector->state;
+	struct hdr_output_metadata *hdr_metadata;
+	struct hdmi_drm_infoframe frame;
+	u8 buffer[30];
+	ssize_t err;
+	int i;
+	u32 val;
+
+	if (!hdmi->plat_data->use_drm_infoframe)
+		return;
+
+	hdmi_modb(hdmi, 0, PKTSCHED_DRMI_TX_EN, PKTSCHED_PKT_EN);
+
+	if (!hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf) {
+		DRM_DEBUG("No need to set HDR metadata in infoframe\n");
+		return;
+	}
+
+	if (!conn_state->hdr_output_metadata) {
+		DRM_DEBUG("source metadata not set yet\n");
+		return;
+	}
+
+	hdr_metadata = (struct hdr_output_metadata *)
+		conn_state->hdr_output_metadata->data;
+
+	if (!(hdmi->connector.hdr_sink_metadata.hdmi_type1.eotf &
+	      BIT(hdr_metadata->hdmi_metadata_type1.eotf))) {
+		DRM_ERROR("Not support EOTF %d\n",
+			  hdr_metadata->hdmi_metadata_type1.eotf);
+		return;
+	}
+
+	err = drm_hdmi_infoframe_set_hdr_metadata(&frame, conn_state);
+	if (err < 0)
+		return;
+
+	err = hdmi_drm_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		dev_err(hdmi->dev, "Failed to pack drm infoframe: %zd\n", err);
+		return;
+	}
+
+	val = (frame.version << 8) | (frame.length << 16);
+	hdmi_writel(hdmi, val, PKT_DRMI_CONTENTS0);
+
+	for (i = 0; i <= frame.length; i++) {
+		if (i % 4 == 0)
+			val = buffer[3 + i];
+		val |= buffer[3 + i] << ((i % 4) * 8);
+
+		if (i % 4 == 3 || (i == (frame.length)))
+			hdmi_writel(hdmi, val, PKT_DRMI_CONTENTS1 + ((i / 4) * 4));
+	}
+
+	hdmi_modb(hdmi, 0, PKTSCHED_DRMI_FIELDRATE, PKTSCHED_PKT_CONFIG1);
+
+	hdmi_modb(hdmi, PKTSCHED_DRMI_TX_EN, PKTSCHED_DRMI_TX_EN, PKTSCHED_PKT_EN);
+
+	DRM_DEBUG("%s eotf %d end\n", __func__,
+		  hdr_metadata->hdmi_metadata_type1.eotf);
+}
+
+/* Filter out invalid setups to avoid configuring SCDC and scrambling */
+static bool dw_hdmi_support_scdc(struct dw_hdmi_qp *hdmi,
+				 const struct drm_display_info *display)
+{
+	/* Disable if no DDC bus */
+	if (!hdmi->ddc)
+		return false;
+
+	/* Disable if SCDC is not supported, or if an HF-VSDB block is absent */
+	if (!display->hdmi.scdc.supported ||
+	    !display->hdmi.scdc.scrambling.supported)
+		return false;
+
+	/*
+	 * Disable if display only support low TMDS rates and scrambling
+	 * for low rates is not supported either
+	 */
+	if (!display->hdmi.scdc.scrambling.low_rates &&
+	    display->max_tmds_clock <= 340000)
+		return false;
+
+	return true;
+}
+
+static int hdmi_set_frl_mask(int frl_rate)
+{
+	switch (frl_rate) {
+	case 48:
+		return FRL_12GBPS_4LANE;
+	case 40:
+		return FRL_10GBPS_4LANE;
+	case 32:
+		return FRL_8GBPS_4LANE;
+	case 24:
+		return FRL_6GBPS_4LANE;
+	case 18:
+		return FRL_6GBPS_3LANE;
+	case 9:
+		return FRL_3GBPS_3LANE;
+	}
+
+	return 0;
+}
+
+static int hdmi_start_flt(struct dw_hdmi_qp *hdmi, u8 rate)
+{
+	u8 val;
+	u8 ffe_lv = 0;
+	int i = 0, stat;
+
+	/* FLT_READY & FFE_LEVELS read */
+	for (i = 0; i < 20; i++) {
+		drm_scdc_readb(hdmi->ddc, SCDC_STATUS_FLAGS_0, &val);
+		if (val & BIT(6))
+			break;
+		msleep(20);
+	}
+
+	if (i == 20) {
+		dev_err(hdmi->dev, "sink flt isn't ready\n");
+		return -EINVAL;
+	}
+
+	hdmi_modb(hdmi, SCDC_UPD_FLAGS_RD_IRQ, SCDC_UPD_FLAGS_RD_IRQ,
+		  MAINUNIT_1_INT_MASK_N);
+	hdmi_modb(hdmi, SCDC_UPD_FLAGS_POLL_EN | SCDC_UPD_FLAGS_AUTO_CLR,
+		  SCDC_UPD_FLAGS_POLL_EN | SCDC_UPD_FLAGS_AUTO_CLR,
+		  SCDC_CONFIG0);
+
+	/* max ffe level 3 */
+	val = 3 << 4 | hdmi_set_frl_mask(rate);
+	drm_scdc_writeb(hdmi->ddc, 0x31, val);
+
+	/* select FRL_RATE & FFE_LEVELS */
+	hdmi_writel(hdmi, ffe_lv, FLT_CONFIG0);
+
+	/* Start LTS_3 state in source DUT */
+	reinit_completion(&hdmi->flt_cmp);
+	hdmi_modb(hdmi, FLT_EXIT_TO_LTSP_IRQ, FLT_EXIT_TO_LTSP_IRQ,
+		  MAINUNIT_1_INT_MASK_N);
+	hdmi_writel(hdmi, 1, FLT_CONTROL0);
+
+	/* wait for completed link training at source side */
+	stat = wait_for_completion_timeout(&hdmi->flt_cmp, HZ * 2);
+	if (!stat) {
+		dev_err(hdmi->dev, "wait lts3 finish time out\n");
+		hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_POLL_EN |
+			  SCDC_UPD_FLAGS_AUTO_CLR, SCDC_CONFIG0);
+		hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_RD_IRQ,
+			  MAINUNIT_1_INT_MASK_N);
+		return -EAGAIN;
+	}
+
+	if (!(hdmi->flt_intr & FLT_EXIT_TO_LTSP_IRQ)) {
+		dev_err(hdmi->dev, "not to ltsp\n");
+		hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_POLL_EN |
+			  SCDC_UPD_FLAGS_AUTO_CLR, SCDC_CONFIG0);
+		hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_RD_IRQ,
+			  MAINUNIT_1_INT_MASK_N);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define HDMI_MODE_FRL_MASK     BIT(30)
+
+static void hdmi_set_op_mode(struct dw_hdmi_qp *hdmi,
+			     struct dw_hdmi_link_config *link_cfg,
+			     const struct drm_connector *connector)
+{
+	int frl_rate;
+	int i;
+
+	/* set sink frl mode disable and wait sink ready */
+	hdmi_writel(hdmi, 0, FLT_CONFIG0);
+	if (dw_hdmi_support_scdc(hdmi, &connector->display_info))
+		drm_scdc_writeb(hdmi->ddc, 0x31, 0);
+	/*
+	 * some TVs must wait a while before switching frl mode resolution,
+	 * or the signal may not be recognized.
+	 */
+	msleep(200);
+
+	if (!link_cfg->frl_mode) {
+		dev_info(hdmi->dev, "dw hdmi qp use tmds mode\n");
+		hdmi_modb(hdmi, 0, OPMODE_FRL, LINK_CONFIG0);
+		hdmi_modb(hdmi, 0, OPMODE_FRL_4LANES, LINK_CONFIG0);
+		return;
+	}
+
+	if (link_cfg->frl_lanes == 4)
+		hdmi_modb(hdmi, OPMODE_FRL_4LANES, OPMODE_FRL_4LANES,
+			  LINK_CONFIG0);
+	else
+		hdmi_modb(hdmi, 0, OPMODE_FRL_4LANES, LINK_CONFIG0);
+
+	hdmi_modb(hdmi, 1, OPMODE_FRL, LINK_CONFIG0);
+
+	frl_rate = link_cfg->frl_lanes * link_cfg->rate_per_lane;
+	hdmi_start_flt(hdmi, frl_rate);
+
+	for (i = 0; i < 50; i++) {
+		hdmi_modb(hdmi, PKTSCHED_NULL_TX_EN, PKTSCHED_NULL_TX_EN, PKTSCHED_PKT_EN);
+		mdelay(1);
+		hdmi_modb(hdmi, 0, PKTSCHED_NULL_TX_EN, PKTSCHED_PKT_EN);
+	}
+}
+
+static unsigned long
+hdmi_get_tmdsclock(struct dw_hdmi_qp *hdmi, unsigned long mpixelclock)
+{
+	unsigned long tmdsclock = mpixelclock;
+	unsigned int depth =
+		hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format);
+
+	if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) {
+		switch (depth) {
+		case 16:
+			tmdsclock = mpixelclock * 2;
+			break;
+		case 12:
+			tmdsclock = mpixelclock * 3 / 2;
+			break;
+		case 10:
+			tmdsclock = mpixelclock * 5 / 4;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return tmdsclock;
+}
+
+//[CC:] is connector param different from hdmi->connector?
+//[CC:] probably it possible to hook the whole implementation into dw-hdmi.c
+static int dw_hdmi_qp_setup(struct dw_hdmi_qp *hdmi,
+			    struct drm_connector *connector,
+			    struct drm_display_mode *mode)
+{
+	int ret;
+	void *data = hdmi->plat_data->phy_data;
+	struct hdmi_vmode_qp *vmode = &hdmi->hdmi_data.video_mode;
+	struct dw_hdmi_link_config *link_cfg;
+	u8 bytes = 0;
+
+	hdmi->vic = drm_match_cea_mode(mode);
+	if (!hdmi->vic)
+		dev_dbg(hdmi->dev, "Non-CEA mode used in HDMI\n");
+	else
+		dev_dbg(hdmi->dev, "CEA mode used vic=%d\n", hdmi->vic);
+
+	if (hdmi->plat_data->get_enc_out_encoding)
+		hdmi->hdmi_data.enc_out_encoding =
+			hdmi->plat_data->get_enc_out_encoding(data);
+	else if ((hdmi->vic == 6) || (hdmi->vic == 7) ||
+		 (hdmi->vic == 21) || (hdmi->vic == 22) ||
+		 (hdmi->vic == 2) || (hdmi->vic == 3) ||
+		 (hdmi->vic == 17) || (hdmi->vic == 18))
+		hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_601;
+	else
+		hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_709;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK) {
+		hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 1;
+		hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 1;
+	} else {
+		hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 0;
+		hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 0;
+	}
+	/*  Get input format from plat data or fallback to RGB888 */
+	if (hdmi->plat_data->get_input_bus_format)
+		hdmi->hdmi_data.enc_in_bus_format =
+			hdmi->plat_data->get_input_bus_format(data);
+	else if (hdmi->plat_data->input_bus_format)
+		hdmi->hdmi_data.enc_in_bus_format =
+			hdmi->plat_data->input_bus_format;
+	else
+		hdmi->hdmi_data.enc_in_bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+
+	/* Default to RGB888 output format */
+	if (hdmi->plat_data->get_output_bus_format)
+		hdmi->hdmi_data.enc_out_bus_format =
+			hdmi->plat_data->get_output_bus_format(data);
+	else
+		hdmi->hdmi_data.enc_out_bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+
+	/* Get input encoding from plat data or fallback to none */
+	if (hdmi->plat_data->get_enc_in_encoding)
+		hdmi->hdmi_data.enc_in_encoding =
+			hdmi->plat_data->get_enc_in_encoding(data);
+	else if (hdmi->plat_data->input_bus_encoding)
+		hdmi->hdmi_data.enc_in_encoding =
+			hdmi->plat_data->input_bus_encoding;
+	else
+		hdmi->hdmi_data.enc_in_encoding = V4L2_YCBCR_ENC_DEFAULT;
+
+	if (hdmi->plat_data->get_quant_range)
+		hdmi->hdmi_data.quant_range =
+			hdmi->plat_data->get_quant_range(data);
+	else
+		hdmi->hdmi_data.quant_range = HDMI_QUANTIZATION_RANGE_DEFAULT;
+
+	if (hdmi->plat_data->get_link_cfg)
+		link_cfg = hdmi->plat_data->get_link_cfg(data);
+	else
+		return -EINVAL;
+
+	hdmi->phy.ops->set_mode(hdmi, hdmi->phy.data, HDMI_MODE_FRL_MASK,
+				link_cfg->frl_mode);
+
+	/*
+	 * According to the dw-hdmi specification 6.4.2
+	 * vp_pr_cd[3:0]:
+	 * 0000b: No pixel repetition (pixel sent only once)
+	 * 0001b: Pixel sent two times (pixel repeated once)
+	 */
+	hdmi->hdmi_data.pix_repet_factor =
+		(mode->flags & DRM_MODE_FLAG_DBLCLK) ? 1 : 0;
+	hdmi->hdmi_data.video_mode.mdataenablepolarity = true;
+
+	vmode->previous_pixelclock = vmode->mpixelclock;
+	//[CC:] no split mode
+	// if (hdmi->plat_data->split_mode)
+	// 	mode->crtc_clock /= 2;
+	vmode->mpixelclock = mode->crtc_clock * 1000;
+	if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
+		vmode->mpixelclock *= 2;
+	dev_dbg(hdmi->dev, "final pixclk = %ld\n", vmode->mpixelclock);
+	vmode->previous_tmdsclock = vmode->mtmdsclock;
+	vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi, vmode->mpixelclock);
+	if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format))
+		vmode->mtmdsclock /= 2;
+	dev_info(hdmi->dev, "final tmdsclk = %d\n", vmode->mtmdsclock);
+
+	ret = hdmi->phy.ops->init(hdmi, hdmi->phy.data, &hdmi->previous_mode);
+	if (ret)
+		return ret;
+
+	if (hdmi->plat_data->set_grf_cfg)
+		hdmi->plat_data->set_grf_cfg(data);
+
+	/* not for DVI mode */
+	if (hdmi->sink_is_hdmi) {
+		dev_dbg(hdmi->dev, "%s HDMI mode\n", __func__);
+		hdmi_modb(hdmi, 0, OPMODE_DVI, LINK_CONFIG0);
+		hdmi_modb(hdmi, HDCP2_BYPASS, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0);
+		if (!link_cfg->frl_mode) {
+			if (vmode->mtmdsclock > HDMI14_MAX_TMDSCLK) {
+				drm_scdc_readb(hdmi->ddc, SCDC_SINK_VERSION, &bytes);
+				drm_scdc_writeb(hdmi->ddc, SCDC_SOURCE_VERSION,
+						min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
+				//[CC:] use dw_hdmi_set_high_tmds_clock_ratio()
+				drm_scdc_set_high_tmds_clock_ratio(connector, 1);
+				drm_scdc_set_scrambling(connector, 1);
+				hdmi_writel(hdmi, 1, SCRAMB_CONFIG0);
+			} else {
+				if (dw_hdmi_support_scdc(hdmi, &connector->display_info)) {
+					drm_scdc_set_high_tmds_clock_ratio(connector, 0);
+					drm_scdc_set_scrambling(connector, 0);
+				}
+				hdmi_writel(hdmi, 0, SCRAMB_CONFIG0);
+			}
+		}
+		/* HDMI Initialization Step F - Configure AVI InfoFrame */
+		hdmi_config_AVI(hdmi, connector, mode);
+		hdmi_config_CVTEM(hdmi);
+		hdmi_config_drm_infoframe(hdmi, connector);
+		hdmi_set_op_mode(hdmi, link_cfg, connector);
+	} else {
+		hdmi_modb(hdmi, HDCP2_BYPASS, HDCP2_BYPASS, HDCP2LOGIC_CONFIG0);
+		hdmi_modb(hdmi, OPMODE_DVI, OPMODE_DVI, LINK_CONFIG0);
+		dev_info(hdmi->dev, "%s DVI mode\n", __func__);
+	}
+
+	return 0;
+}
+
+static enum drm_connector_status
+dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	struct dw_hdmi_qp *secondary = NULL;
+	enum drm_connector_status result, result_secondary;
+
+	mutex_lock(&hdmi->mutex);
+	hdmi->force = DRM_FORCE_UNSPECIFIED;
+	mutex_unlock(&hdmi->mutex);
+
+	if (hdmi->plat_data->left)
+		secondary = hdmi->plat_data->left;
+	else if (hdmi->plat_data->right)
+		secondary = hdmi->plat_data->right;
+
+	result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+
+	if (secondary) {
+		result_secondary = secondary->phy.ops->read_hpd(secondary, secondary->phy.data);
+		if (result == connector_status_connected &&
+		    result_secondary == connector_status_connected)
+			result = connector_status_connected;
+		else
+			result = connector_status_disconnected;
+	}
+
+	mutex_lock(&hdmi->mutex);
+	if (result != hdmi->last_connector_result) {
+		dev_dbg(hdmi->dev, "read_hpd result: %d", result);
+		handle_plugged_change(hdmi,
+				      result == connector_status_connected);
+		hdmi->last_connector_result = result;
+	}
+	mutex_unlock(&hdmi->mutex);
+
+	return result;
+}
+
+static int
+dw_hdmi_update_hdr_property(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct dw_hdmi_qp *hdmi = container_of(connector, struct dw_hdmi_qp,
+					       connector);
+	void *data = hdmi->plat_data->phy_data;
+	const struct hdr_static_metadata *metadata =
+		&connector->hdr_sink_metadata.hdmi_type1;
+	size_t size = sizeof(*metadata);
+	struct drm_property *property = NULL;
+	struct drm_property_blob *blob;
+	int ret;
+
+	if (hdmi->plat_data->get_hdr_property)
+		property = hdmi->plat_data->get_hdr_property(data);
+
+	if (!property)
+		return -EINVAL;
+
+	if (hdmi->plat_data->get_hdr_blob)
+		blob = hdmi->plat_data->get_hdr_blob(data);
+	else
+		return -EINVAL;
+
+	ret = drm_property_replace_global_blob(dev, &blob, size, metadata,
+					       &connector->base, property);
+	return ret;
+}
+
+static int dw_hdmi_connector_get_modes(struct drm_connector *connector)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	struct hdr_static_metadata *metedata =
+		&connector->hdr_sink_metadata.hdmi_type1;
+	struct edid *edid;
+	struct drm_display_mode *mode;
+	struct drm_display_info *info = &connector->display_info;
+	// void *data = hdmi->plat_data->phy_data;
+	int i, ret = 0;
+
+	if (!hdmi->ddc)
+		return 0;
+
+	memset(metedata, 0, sizeof(*metedata));
+	edid = drm_get_edid(connector, hdmi->ddc);
+	if (edid) {
+		dev_dbg(hdmi->dev, "got edid: width[%d] x height[%d]\n",
+			edid->width_cm, edid->height_cm);
+
+		hdmi->sink_is_hdmi = drm_detect_hdmi_monitor(edid);
+		drm_connector_update_edid_property(connector, edid);
+		// if (hdmi->plat_data->get_edid_dsc_info)
+		// 	hdmi->plat_data->get_edid_dsc_info(data, edid);
+		ret = drm_add_edid_modes(connector, edid);
+		dw_hdmi_update_hdr_property(connector);
+		// if (ret > 0 && hdmi->plat_data->split_mode) {
+		// 	struct dw_hdmi_qp *secondary = NULL;
+		// 	void *secondary_data;
+		//
+		// 	if (hdmi->plat_data->left)
+		// 		secondary = hdmi->plat_data->left;
+		// 	else if (hdmi->plat_data->right)
+		// 		secondary = hdmi->plat_data->right;
+		//
+		// 	if (!secondary)
+		// 		return -ENOMEM;
+		// 	secondary_data = secondary->plat_data->phy_data;
+		//
+		// 	list_for_each_entry(mode, &connector->probed_modes, head)
+		// 		hdmi->plat_data->convert_to_split_mode(mode);
+		//
+		// 	secondary->sink_is_hdmi = drm_detect_hdmi_monitor(edid);
+		// 	if (secondary->plat_data->get_edid_dsc_info)
+		// 		secondary->plat_data->get_edid_dsc_info(secondary_data, edid);
+		// }
+		kfree(edid);
+	} else {
+		hdmi->sink_is_hdmi = true;
+
+		if (hdmi->plat_data->split_mode) {
+			if (hdmi->plat_data->left) {
+				hdmi->plat_data->left->sink_is_hdmi = true;
+			} else if (hdmi->plat_data->right) {
+				hdmi->plat_data->right->sink_is_hdmi = true;
+			}
+		}
+
+		for (i = 0; i < ARRAY_SIZE(dw_hdmi_default_modes); i++) {
+			const struct drm_display_mode *ptr =
+				&dw_hdmi_default_modes[i];
+
+			mode = drm_mode_duplicate(connector->dev, ptr);
+			if (mode) {
+				if (!i)
+					mode->type = DRM_MODE_TYPE_PREFERRED;
+				drm_mode_probed_add(connector, mode);
+				ret++;
+			}
+		}
+		if (ret > 0 && hdmi->plat_data->split_mode) {
+			struct drm_display_mode *mode;
+
+			list_for_each_entry(mode, &connector->probed_modes, head)
+				hdmi->plat_data->convert_to_split_mode(mode);
+		}
+		info->edid_hdmi_rgb444_dc_modes = 0;
+		info->hdmi.y420_dc_modes = 0;
+		info->color_formats = 0;
+
+		dev_info(hdmi->dev, "failed to get edid\n");
+	}
+
+	return ret;
+}
+
+static int
+dw_hdmi_atomic_connector_set_property(struct drm_connector *connector,
+				      struct drm_connector_state *state,
+				      struct drm_property *property,
+				      uint64_t val)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	const struct dw_hdmi_property_ops *ops = hdmi->plat_data->property_ops;
+
+	if (ops && ops->set_property)
+		return ops->set_property(connector, state, property,
+					 val, hdmi->plat_data->phy_data);
+	else
+		return -EINVAL;
+}
+
+static int
+dw_hdmi_atomic_connector_get_property(struct drm_connector *connector,
+				      const struct drm_connector_state *state,
+				      struct drm_property *property,
+				      uint64_t *val)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	const struct dw_hdmi_property_ops *ops = hdmi->plat_data->property_ops;
+
+	if (ops && ops->get_property)
+		return ops->get_property(connector, state, property,
+					 val, hdmi->plat_data->phy_data);
+	else
+		return -EINVAL;
+}
+
+static int
+dw_hdmi_connector_set_property(struct drm_connector *connector,
+			       struct drm_property *property, uint64_t val)
+{
+	return dw_hdmi_atomic_connector_set_property(connector, NULL,
+						     property, val);
+}
+
+static void dw_hdmi_attach_properties(struct dw_hdmi_qp *hdmi)
+{
+	unsigned int color = MEDIA_BUS_FMT_RGB888_1X24;
+	const struct dw_hdmi_property_ops *ops =
+				hdmi->plat_data->property_ops;
+
+	if (ops && ops->attach_properties)
+		return ops->attach_properties(&hdmi->connector, color, 0,
+					      hdmi->plat_data->phy_data);
+}
+
+static void dw_hdmi_destroy_properties(struct dw_hdmi_qp *hdmi)
+{
+	const struct dw_hdmi_property_ops *ops =
+				hdmi->plat_data->property_ops;
+
+	if (ops && ops->destroy_properties)
+		return ops->destroy_properties(&hdmi->connector,
+					       hdmi->plat_data->phy_data);
+}
+
+static struct drm_encoder *
+dw_hdmi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+
+	return hdmi->bridge.encoder;
+}
+
+static bool dw_hdmi_color_changed(struct drm_connector *connector,
+				  struct drm_atomic_state *state)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	void *data = hdmi->plat_data->phy_data;
+	struct drm_connector_state *old_state =
+		drm_atomic_get_old_connector_state(state, connector);
+	struct drm_connector_state *new_state =
+		drm_atomic_get_new_connector_state(state, connector);
+	bool ret = false;
+
+	if (hdmi->plat_data->get_color_changed)
+		ret = hdmi->plat_data->get_color_changed(data);
+
+	if (new_state->colorspace != old_state->colorspace)
+		ret = true;
+
+	return ret;
+}
+
+static bool hdr_metadata_equal(const struct drm_connector_state *old_state,
+			       const struct drm_connector_state *new_state)
+{
+	struct drm_property_blob *old_blob = old_state->hdr_output_metadata;
+	struct drm_property_blob *new_blob = new_state->hdr_output_metadata;
+
+	if (!old_blob || !new_blob)
+		return old_blob == new_blob;
+
+	if (old_blob->length != new_blob->length)
+		return false;
+
+	return !memcmp(old_blob->data, new_blob->data, old_blob->length);
+}
+
+static int dw_hdmi_connector_atomic_check(struct drm_connector *connector,
+					  struct drm_atomic_state *state)
+{
+	struct drm_connector_state *old_state =
+		drm_atomic_get_old_connector_state(state, connector);
+	struct drm_connector_state *new_state =
+		drm_atomic_get_new_connector_state(state, connector);
+	struct drm_crtc *crtc = new_state->crtc;
+	struct drm_crtc_state *crtc_state;
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+	struct drm_display_mode *mode = NULL;
+	void *data = hdmi->plat_data->phy_data;
+	struct hdmi_vmode_qp *vmode = &hdmi->hdmi_data.video_mode;
+
+	if (!crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	/*
+	 * If HDMI is enabled in uboot, it's need to record
+	 * drm_display_mode and set phy status to enabled.
+	 */
+	if (!vmode->mpixelclock) {
+		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (hdmi->plat_data->get_enc_in_encoding)
+			hdmi->hdmi_data.enc_in_encoding =
+				hdmi->plat_data->get_enc_in_encoding(data);
+		if (hdmi->plat_data->get_enc_out_encoding)
+			hdmi->hdmi_data.enc_out_encoding =
+				hdmi->plat_data->get_enc_out_encoding(data);
+		if (hdmi->plat_data->get_input_bus_format)
+			hdmi->hdmi_data.enc_in_bus_format =
+				hdmi->plat_data->get_input_bus_format(data);
+		if (hdmi->plat_data->get_output_bus_format)
+			hdmi->hdmi_data.enc_out_bus_format =
+				hdmi->plat_data->get_output_bus_format(data);
+
+		mode = &crtc_state->mode;
+		if (hdmi->plat_data->split_mode) {
+			hdmi->plat_data->convert_to_origin_mode(mode);
+			mode->crtc_clock /= 2;
+		}
+		memcpy(&hdmi->previous_mode, mode, sizeof(hdmi->previous_mode));
+		vmode->mpixelclock = mode->crtc_clock * 1000;
+		vmode->previous_pixelclock = mode->clock;
+		vmode->previous_tmdsclock = mode->clock;
+		vmode->mtmdsclock = hdmi_get_tmdsclock(hdmi,
+						       vmode->mpixelclock);
+		if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format))
+			vmode->mtmdsclock /= 2;
+	}
+
+	if (!hdr_metadata_equal(old_state, new_state) ||
+	    dw_hdmi_color_changed(connector, state)) {
+		crtc_state = drm_atomic_get_crtc_state(state, crtc);
+		if (IS_ERR(crtc_state))
+			return PTR_ERR(crtc_state);
+
+		crtc_state->mode_changed = true;
+	}
+
+	return 0;
+}
+
+static void dw_hdmi_connector_force(struct drm_connector *connector)
+{
+	struct dw_hdmi_qp *hdmi =
+		container_of(connector, struct dw_hdmi_qp, connector);
+
+	mutex_lock(&hdmi->mutex);
+
+	if (hdmi->force != connector->force) {
+		if (!hdmi->disabled && connector->force == DRM_FORCE_OFF)
+			extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI,
+					      false);
+		else if (hdmi->disabled && connector->force == DRM_FORCE_ON)
+			extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI,
+					      true);
+	}
+
+	hdmi->force = connector->force;
+	mutex_unlock(&hdmi->mutex);
+}
+
+static int dw_hdmi_qp_fill_modes(struct drm_connector *connector, u32 max_x,
+								 u32 max_y)
+{
+	return drm_helper_probe_single_connector_modes(connector, 9000, 9000);
+}
+
+static const struct drm_connector_funcs dw_hdmi_connector_funcs = {
+	.fill_modes = dw_hdmi_qp_fill_modes,
+	.detect = dw_hdmi_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.force = dw_hdmi_connector_force,
+	.reset = drm_atomic_helper_connector_reset,
+	.set_property = dw_hdmi_connector_set_property,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+	.atomic_set_property = dw_hdmi_atomic_connector_set_property,
+	.atomic_get_property = dw_hdmi_atomic_connector_get_property,
+};
+
+static const struct drm_connector_helper_funcs dw_hdmi_connector_helper_funcs = {
+	.get_modes = dw_hdmi_connector_get_modes,
+	.best_encoder = dw_hdmi_connector_best_encoder,
+	.atomic_check = dw_hdmi_connector_atomic_check,
+};
+
+static int dw_hdmi_qp_bridge_attach(struct drm_bridge *bridge,
+				    enum drm_bridge_attach_flags flags)
+{
+	struct dw_hdmi_qp *hdmi = bridge->driver_private;
+	struct drm_encoder *encoder = bridge->encoder;
+	struct drm_connector *connector = &hdmi->connector;
+
+	if (flags & DRM_BRIDGE_ATTACH_NO_CONNECTOR)
+		return 0;
+
+	connector->interlace_allowed = 1;
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+	drm_connector_helper_add(connector, &dw_hdmi_connector_helper_funcs);
+
+	// [CC:] use drm_connector_init_with_ddc or drmm_connector_init
+	// to provide ddc reference
+	drm_connector_init_with_ddc(bridge->dev, connector,
+				    &dw_hdmi_connector_funcs,
+				    DRM_MODE_CONNECTOR_HDMIA,
+				    hdmi->ddc);
+
+	drm_connector_attach_encoder(connector, encoder);
+	dw_hdmi_attach_properties(hdmi);
+
+	return 0;
+}
+
+static void dw_hdmi_qp_bridge_mode_set(struct drm_bridge *bridge,
+				       const struct drm_display_mode *orig_mode,
+				       const struct drm_display_mode *mode)
+{
+	struct dw_hdmi_qp *hdmi = bridge->driver_private;
+
+	mutex_lock(&hdmi->mutex);
+
+	/* Store the display mode for plugin/DKMS poweron events */
+	memcpy(&hdmi->previous_mode, mode, sizeof(hdmi->previous_mode));
+	if (hdmi->plat_data->split_mode)
+		hdmi->plat_data->convert_to_origin_mode(&hdmi->previous_mode);
+
+	mutex_unlock(&hdmi->mutex);
+}
+
+static enum drm_mode_status
+dw_hdmi_qp_bridge_mode_valid(struct drm_bridge *bridge,
+			     const struct drm_display_info *info,
+			     const struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static void dw_hdmi_qp_bridge_atomic_enable(struct drm_bridge *bridge,
+					    struct drm_bridge_state *old_state)
+{
+	struct dw_hdmi_qp *hdmi = bridge->driver_private;
+	struct drm_atomic_state *state = old_state->base.state;
+	struct drm_connector *connector;
+
+	connector = drm_atomic_get_new_connector_for_encoder(state,
+							     bridge->encoder);
+
+	mutex_lock(&hdmi->mutex);
+	hdmi->curr_conn = connector;
+	dw_hdmi_qp_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode);
+	hdmi->disabled = false;
+	mutex_unlock(&hdmi->mutex);
+
+	extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, true);
+	handle_plugged_change(hdmi, true);
+}
+
+static void dw_hdmi_qp_bridge_atomic_disable(struct drm_bridge *bridge,
+					     struct drm_bridge_state *old_state)
+{
+	struct dw_hdmi_qp *hdmi = bridge->driver_private;
+
+	extcon_set_state_sync(hdmi->extcon, EXTCON_DISP_HDMI, false);
+	handle_plugged_change(hdmi, false);
+	mutex_lock(&hdmi->mutex);
+
+	hdmi->curr_conn = NULL;
+
+	if (hdmi->phy.ops->disable)
+		hdmi->phy.ops->disable(hdmi, hdmi->phy.data);
+	hdmi->disabled = true;
+	mutex_unlock(&hdmi->mutex);
+}
+
+static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = {
+	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+	.atomic_reset = drm_atomic_helper_bridge_reset,
+	.attach = dw_hdmi_qp_bridge_attach,
+	.mode_set = dw_hdmi_qp_bridge_mode_set,
+	.mode_valid = dw_hdmi_qp_bridge_mode_valid,
+	.atomic_enable = dw_hdmi_qp_bridge_atomic_enable,
+	.atomic_disable = dw_hdmi_qp_bridge_atomic_disable,
+};
+
+static irqreturn_t dw_hdmi_qp_main_hardirq(int irq, void *dev_id)
+{
+	struct dw_hdmi_qp *hdmi = dev_id;
+	struct dw_hdmi_qp_i2c *i2c = hdmi->i2c;
+	u32 stat;
+
+	stat = hdmi_readl(hdmi, MAINUNIT_1_INT_STATUS);
+
+	i2c->stat = stat & (I2CM_OP_DONE_IRQ | I2CM_READ_REQUEST_IRQ |
+			    I2CM_NACK_RCVD_IRQ);
+	hdmi->scdc_intr = stat & (SCDC_UPD_FLAGS_RD_IRQ |
+				  SCDC_UPD_FLAGS_CHG_IRQ |
+				  SCDC_UPD_FLAGS_CLR_IRQ |
+				  SCDC_RR_REPLY_STOP_IRQ |
+				  SCDC_NACK_RCVD_IRQ);
+	hdmi->flt_intr = stat & (FLT_EXIT_TO_LTSP_IRQ |
+				 FLT_EXIT_TO_LTS4_IRQ |
+				 FLT_EXIT_TO_LTSL_IRQ);
+
+	if (i2c->stat) {
+		hdmi_writel(hdmi, i2c->stat, MAINUNIT_1_INT_CLEAR);
+		complete(&i2c->cmp);
+	}
+
+	if (hdmi->flt_intr) {
+		dev_dbg(hdmi->dev, "i2c flt irq:%#x\n", hdmi->flt_intr);
+		hdmi_writel(hdmi, hdmi->flt_intr, MAINUNIT_1_INT_CLEAR);
+		complete(&hdmi->flt_cmp);
+	}
+
+	if (hdmi->scdc_intr) {
+		u8 val;
+
+		dev_dbg(hdmi->dev, "i2c scdc irq:%#x\n", hdmi->scdc_intr);
+		hdmi_writel(hdmi, hdmi->scdc_intr, MAINUNIT_1_INT_CLEAR);
+		val = hdmi_readl(hdmi, SCDC_STATUS0);
+
+		/* frl start */
+		if (val & BIT(4)) {
+			hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_POLL_EN |
+				  SCDC_UPD_FLAGS_AUTO_CLR, SCDC_CONFIG0);
+			hdmi_modb(hdmi, 0, SCDC_UPD_FLAGS_RD_IRQ,
+				  MAINUNIT_1_INT_MASK_N);
+			dev_info(hdmi->dev, "frl start\n");
+		}
+
+	}
+
+	if (stat)
+		return IRQ_HANDLED;
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t dw_hdmi_qp_avp_hardirq(int irq, void *dev_id)
+{
+	struct dw_hdmi_qp *hdmi = dev_id;
+	u32 stat;
+
+	stat = hdmi_readl(hdmi, AVP_1_INT_STATUS);
+	if (stat) {
+		dev_dbg(hdmi->dev, "HDCP irq %#x\n", stat);
+		stat &= ~stat;
+		hdmi_writel(hdmi, stat, AVP_1_INT_MASK_N);
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t dw_hdmi_qp_earc_hardirq(int irq, void *dev_id)
+{
+	struct dw_hdmi_qp *hdmi = dev_id;
+	u32 stat;
+
+	stat = hdmi_readl(hdmi, EARCRX_0_INT_STATUS);
+	if (stat) {
+		dev_dbg(hdmi->dev, "earc irq %#x\n", stat);
+		stat &= ~stat;
+		hdmi_writel(hdmi, stat, EARCRX_0_INT_MASK_N);
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t dw_hdmi_qp_avp_irq(int irq, void *dev_id)
+{
+	struct dw_hdmi_qp *hdmi = dev_id;
+	u32 stat;
+
+	stat = hdmi_readl(hdmi, AVP_1_INT_STATUS);
+
+	if (!stat)
+		return IRQ_NONE;
+
+	hdmi_writel(hdmi, stat, AVP_1_INT_CLEAR);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t dw_hdmi_qp_earc_irq(int irq, void *dev_id)
+{
+	struct dw_hdmi_qp *hdmi = dev_id;
+	u32 stat;
+
+	stat = hdmi_readl(hdmi, EARCRX_0_INT_STATUS);
+
+	if (!stat)
+		return IRQ_NONE;
+
+	hdmi_writel(hdmi, stat, EARCRX_0_INT_CLEAR);
+
+	hdmi->earc_intr = stat;
+	complete(&hdmi->earc_cmp);
+
+	return IRQ_HANDLED;
+}
+
+static int dw_hdmi_detect_phy(struct dw_hdmi_qp *hdmi)
+{
+	u8 phy_type;
+
+	phy_type = hdmi->plat_data->phy_force_vendor ?
+				DW_HDMI_PHY_VENDOR_PHY : 0;
+
+	if (phy_type == DW_HDMI_PHY_VENDOR_PHY) {
+		/* Vendor PHYs require support from the glue layer. */
+		if (!hdmi->plat_data->qp_phy_ops || !hdmi->plat_data->phy_name) {
+			dev_err(hdmi->dev,
+				"Vendor HDMI PHY not supported by glue layer\n");
+			return -ENODEV;
+		}
+
+		hdmi->phy.ops = hdmi->plat_data->qp_phy_ops;
+		hdmi->phy.data = hdmi->plat_data->phy_data;
+		hdmi->phy.name = hdmi->plat_data->phy_name;
+	}
+
+	return 0;
+}
+
+static const struct regmap_config hdmi_regmap_config = {
+	.reg_bits	= 32,
+	.val_bits	= 32,
+	.reg_stride	= 4,
+	.max_register	= EARCRX_1_INT_FORCE,
+};
+
+struct dw_hdmi_qp_reg_table {
+	int reg_base;
+	int reg_end;
+};
+
+static const struct dw_hdmi_qp_reg_table hdmi_reg_table[] = {
+	{0x0, 0xc},
+	{0x14, 0x1c},
+	{0x44, 0x48},
+	{0x50, 0x58},
+	{0x80, 0x84},
+	{0xa0, 0xc4},
+	{0xe0, 0xe8},
+	{0xf0, 0x118},
+	{0x140, 0x140},
+	{0x150, 0x150},
+	{0x160, 0x168},
+	{0x180, 0x180},
+	{0x800, 0x800},
+	{0x808, 0x808},
+	{0x814, 0x814},
+	{0x81c, 0x824},
+	{0x834, 0x834},
+	{0x840, 0x864},
+	{0x86c, 0x86c},
+	{0x880, 0x89c},
+	{0x8e0, 0x8e8},
+	{0x900, 0x900},
+	{0x908, 0x90c},
+	{0x920, 0x938},
+	{0x920, 0x938},
+	{0x960, 0x960},
+	{0x968, 0x968},
+	{0xa20, 0xa20},
+	{0xa30, 0xa30},
+	{0xa40, 0xa40},
+	{0xa54, 0xa54},
+	{0xa80, 0xaac},
+	{0xab4, 0xab8},
+	{0xb00, 0xcbc},
+	{0xce0, 0xce0},
+	{0xd00, 0xddc},
+	{0xe20, 0xe24},
+	{0xe40, 0xe44},
+	{0xe4c, 0xe4c},
+	{0xe60, 0xe80},
+	{0xea0, 0xf24},
+	{0x1004, 0x100c},
+	{0x1020, 0x1030},
+	{0x1040, 0x1050},
+	{0x1060, 0x1068},
+	{0x1800, 0x1820},
+	{0x182c, 0x182c},
+	{0x1840, 0x1940},
+	{0x1960, 0x1a60},
+	{0x1b00, 0x1b00},
+	{0x1c00, 0x1c00},
+	{0x3000, 0x3000},
+	{0x3010, 0x3014},
+	{0x3020, 0x3024},
+	{0x3800, 0x3800},
+	{0x3810, 0x3814},
+	{0x3820, 0x3824},
+	{0x3830, 0x3834},
+	{0x3840, 0x3844},
+	{0x3850, 0x3854},
+	{0x3860, 0x3864},
+	{0x3870, 0x3874},
+	{0x4000, 0x4004},
+	{0x4800, 0x4800},
+	{0x4810, 0x4814},
+};
+
+static int dw_hdmi_ctrl_show(struct seq_file *s, void *v)
+{
+	struct dw_hdmi_qp *hdmi = s->private;
+	u32 i = 0, j = 0, val = 0;
+
+	seq_puts(s, "\n---------------------------------------------------");
+
+	for (i = 0; i < ARRAY_SIZE(hdmi_reg_table); i++) {
+		for (j = hdmi_reg_table[i].reg_base;
+		     j <= hdmi_reg_table[i].reg_end; j += 4) {
+			val = hdmi_readl(hdmi, j);
+
+			if ((j - hdmi_reg_table[i].reg_base) % 16 == 0)
+				seq_printf(s, "\n>>>hdmi_ctl %04x:", j);
+			seq_printf(s, " %08x", val);
+		}
+	}
+	seq_puts(s, "\n---------------------------------------------------\n");
+
+	return 0;
+}
+
+static int dw_hdmi_ctrl_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_hdmi_ctrl_show, inode->i_private);
+}
+
+static ssize_t
+dw_hdmi_ctrl_write(struct file *file, const char __user *buf,
+		   size_t count, loff_t *ppos)
+{
+	struct dw_hdmi_qp *hdmi =
+		((struct seq_file *)file->private_data)->private;
+	u32 reg, val;
+	char kbuf[25];
+
+	if (count > 24) {
+		dev_err(hdmi->dev, "out of buf range\n");
+		return count;
+	}
+
+	if (copy_from_user(kbuf, buf, count))
+		return -EFAULT;
+	kbuf[count - 1] = '\0';
+
+	if (sscanf(kbuf, "%x %x", &reg, &val) == -1)
+		return -EFAULT;
+	if (reg > EARCRX_1_INT_FORCE) {
+		dev_err(hdmi->dev, "it is no a hdmi register\n");
+		return count;
+	}
+	dev_info(hdmi->dev, "/**********hdmi register config******/");
+	dev_info(hdmi->dev, "\n reg=%x val=%x\n", reg, val);
+	hdmi_writel(hdmi, val, reg);
+	return count;
+}
+
+static const struct file_operations dw_hdmi_ctrl_fops = {
+	.owner = THIS_MODULE,
+	.open = dw_hdmi_ctrl_open,
+	.read = seq_read,
+	.write = dw_hdmi_ctrl_write,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int dw_hdmi_status_show(struct seq_file *s, void *v)
+{
+	struct dw_hdmi_qp *hdmi = s->private;
+	u32 val;
+
+	seq_puts(s, "PHY: ");
+	if (hdmi->disabled) {
+		seq_puts(s, "disabled\n");
+		return 0;
+	}
+	seq_puts(s, "enabled\t\t\tMode: ");
+	if (hdmi->sink_is_hdmi)
+		seq_puts(s, "HDMI\n");
+	else
+		seq_puts(s, "DVI\n");
+
+	if (hdmi->hdmi_data.video_mode.mpixelclock > 600000000) {
+		seq_printf(s, "FRL Mode Pixel Clk: %luHz\n",
+			   hdmi->hdmi_data.video_mode.mpixelclock);
+	} else {
+		if (hdmi->hdmi_data.video_mode.mtmdsclock > 340000000)
+			val = hdmi->hdmi_data.video_mode.mtmdsclock / 4;
+		else
+			val = hdmi->hdmi_data.video_mode.mtmdsclock;
+		seq_printf(s, "TMDS Mode Pixel Clk: %luHz\t\tTMDS Clk: %uHz\n",
+			   hdmi->hdmi_data.video_mode.mpixelclock, val);
+	}
+
+	seq_puts(s, "Color Format: ");
+	if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format))
+		seq_puts(s, "RGB");
+	else if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format))
+		seq_puts(s, "YUV444");
+	else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format))
+		seq_puts(s, "YUV422");
+	else if (hdmi_bus_fmt_is_yuv420(hdmi->hdmi_data.enc_out_bus_format))
+		seq_puts(s, "YUV420");
+	else
+		seq_puts(s, "UNKNOWN");
+	val =  hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format);
+	seq_printf(s, "\t\tColor Depth: %d bit\n", val);
+	seq_puts(s, "Colorimetry: ");
+	switch (hdmi->hdmi_data.enc_out_encoding) {
+	case V4L2_YCBCR_ENC_601:
+		seq_puts(s, "ITU.BT601");
+		break;
+	case V4L2_YCBCR_ENC_709:
+		seq_puts(s, "ITU.BT709");
+		break;
+	case V4L2_YCBCR_ENC_BT2020:
+		seq_puts(s, "ITU.BT2020");
+		break;
+	default: /* Carries no data */
+		seq_puts(s, "ITU.BT601");
+		break;
+	}
+
+	seq_puts(s, "\t\tEOTF: ");
+
+	val = hdmi_readl(hdmi, PKTSCHED_PKT_EN);
+	if (!(val & PKTSCHED_DRMI_TX_EN)) {
+		seq_puts(s, "Off\n");
+		return 0;
+	}
+
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS1);
+	val = (val >> 8) & 0x7;
+	switch (val) {
+	case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
+		seq_puts(s, "SDR");
+		break;
+	case HDMI_EOTF_TRADITIONAL_GAMMA_HDR:
+		seq_puts(s, "HDR");
+		break;
+	case HDMI_EOTF_SMPTE_ST2084:
+		seq_puts(s, "ST2084");
+		break;
+	case HDMI_EOTF_BT_2100_HLG:
+		seq_puts(s, "HLG");
+		break;
+	default:
+		seq_puts(s, "Not Defined\n");
+		return 0;
+	}
+
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS1);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "\nx0: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS2);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\t\ty0: %d\n", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS2);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "x1: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS3);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\t\ty1: %d\n", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS3);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "x2: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS4);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\t\ty2: %d\n", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS4);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "white x: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS5);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\twhite y: %d\n", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS5);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "max lum: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS6);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\tmin lum: %d\n", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS6);
+	val = (val >> 16) & 0xffff;
+	seq_printf(s, "max cll: %d", val);
+	val = hdmi_readl(hdmi, PKT_DRMI_CONTENTS7);
+	val = val & 0xffff;
+	seq_printf(s, "\t\t\tmax fall: %d\n", val);
+	return 0;
+}
+
+static int dw_hdmi_status_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dw_hdmi_status_show, inode->i_private);
+}
+
+static const struct file_operations dw_hdmi_status_fops = {
+	.owner = THIS_MODULE,
+	.open = dw_hdmi_status_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void dw_hdmi_register_debugfs(struct device *dev, struct dw_hdmi_qp *hdmi)
+{
+	u8 buf[11];
+
+	snprintf(buf, sizeof(buf), "dw-hdmi%d", hdmi->plat_data->id);
+	hdmi->debugfs_dir = debugfs_create_dir(buf, NULL);
+	if (IS_ERR(hdmi->debugfs_dir)) {
+		dev_err(dev, "failed to create debugfs dir!\n");
+		return;
+	}
+
+	debugfs_create_file("status", 0400, hdmi->debugfs_dir,
+			    hdmi, &dw_hdmi_status_fops);
+	debugfs_create_file("ctrl", 0600, hdmi->debugfs_dir,
+			    hdmi, &dw_hdmi_ctrl_fops);
+}
+
+static struct dw_hdmi_qp *
+__dw_hdmi_probe(struct platform_device *pdev,
+		const struct dw_hdmi_plat_data *plat_data)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *ddc_node;
+	struct dw_hdmi_qp *hdmi;
+	struct resource *iores = NULL;
+	int irq;
+	int ret;
+
+	hdmi = devm_kzalloc(dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return ERR_PTR(-ENOMEM);
+
+	hdmi->connector.stereo_allowed = 1;
+	hdmi->plat_data = plat_data;
+	hdmi->dev = dev;
+	hdmi->disabled = true;
+
+	mutex_init(&hdmi->mutex);
+
+	ddc_node = of_parse_phandle(np, "ddc-i2c-bus", 0);
+	if (ddc_node) {
+		hdmi->ddc = of_get_i2c_adapter_by_node(ddc_node);
+		of_node_put(ddc_node);
+		if (!hdmi->ddc) {
+			dev_dbg(hdmi->dev, "failed to read ddc node\n");
+			return ERR_PTR(-EPROBE_DEFER);
+		}
+
+	} else {
+		dev_dbg(hdmi->dev, "no ddc property found\n");
+	}
+
+	if (!plat_data->regm) {
+		const struct regmap_config *reg_config;
+
+		reg_config = &hdmi_regmap_config;
+
+		iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		hdmi->regs = devm_ioremap_resource(dev, iores);
+		if (IS_ERR(hdmi->regs)) {
+			ret = PTR_ERR(hdmi->regs);
+			goto err_res;
+		}
+
+		hdmi->regm = devm_regmap_init_mmio(dev, hdmi->regs, reg_config);
+		if (IS_ERR(hdmi->regm)) {
+			dev_err(dev, "Failed to configure regmap\n");
+			ret = PTR_ERR(hdmi->regm);
+			goto err_res;
+		}
+	} else {
+		hdmi->regm = plat_data->regm;
+	}
+
+	ret = dw_hdmi_detect_phy(hdmi);
+	if (ret < 0)
+		goto err_res;
+
+	hdmi_writel(hdmi, 0, MAINUNIT_0_INT_MASK_N);
+	hdmi_writel(hdmi, 0, MAINUNIT_1_INT_MASK_N);
+	hdmi_writel(hdmi, 428571429, TIMER_BASE_CONFIG0);
+	if ((hdmi_readl(hdmi, CMU_STATUS) & DISPLAY_CLK_MONITOR) == DISPLAY_CLK_LOCKED) {
+		hdmi->initialized = true;
+		hdmi->disabled = false;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto err_res;
+	}
+
+	hdmi->avp_irq = irq;
+	ret = devm_request_threaded_irq(dev, hdmi->avp_irq,
+					dw_hdmi_qp_avp_hardirq,
+					dw_hdmi_qp_avp_irq, IRQF_SHARED,
+					dev_name(dev), hdmi);
+	if (ret)
+		goto err_res;
+
+	irq = platform_get_irq(pdev, 1);
+	if (irq < 0) {
+		ret = irq;
+		goto err_res;
+	}
+
+	irq = platform_get_irq(pdev, 2);
+	if (irq < 0) {
+		ret = irq;
+		goto err_res;
+	}
+
+	hdmi->earc_irq = irq;
+	ret = devm_request_threaded_irq(dev, hdmi->earc_irq,
+					dw_hdmi_qp_earc_hardirq,
+					dw_hdmi_qp_earc_irq, IRQF_SHARED,
+					dev_name(dev), hdmi);
+	if (ret)
+		goto err_res;
+
+	irq = platform_get_irq(pdev, 3);
+	if (irq < 0) {
+		ret = irq;
+		goto err_res;
+	}
+
+	hdmi->main_irq = irq;
+	ret = devm_request_threaded_irq(dev, hdmi->main_irq,
+					dw_hdmi_qp_main_hardirq, NULL,
+					IRQF_SHARED, dev_name(dev), hdmi);
+	if (ret)
+		goto err_res;
+
+	/* If DDC bus is not specified, try to register HDMI I2C bus */
+	if (!hdmi->ddc) {
+		hdmi->ddc = dw_hdmi_i2c_adapter(hdmi);
+		if (IS_ERR(hdmi->ddc))
+			hdmi->ddc = NULL;
+		/*
+		 * Read high and low time from device tree. If not available use
+		 * the default timing scl clock rate is about 99.6KHz.
+		 */
+		if (of_property_read_u32(np, "ddc-i2c-scl-high-time-ns",
+					 &hdmi->i2c->scl_high_ns))
+			hdmi->i2c->scl_high_ns = 4708;
+		if (of_property_read_u32(np, "ddc-i2c-scl-low-time-ns",
+					 &hdmi->i2c->scl_low_ns))
+			hdmi->i2c->scl_low_ns = 4916;
+	}
+
+	hdmi->bridge.driver_private = hdmi;
+	hdmi->bridge.funcs = &dw_hdmi_bridge_funcs;
+#ifdef CONFIG_OF
+	hdmi->bridge.of_node = pdev->dev.of_node;
+#endif
+
+	if (hdmi->phy.ops->setup_hpd)
+		hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data);
+
+	hdmi->connector.ycbcr_420_allowed = hdmi->plat_data->ycbcr_420_allowed;
+
+	hdmi->extcon = devm_extcon_dev_allocate(hdmi->dev, dw_hdmi_cable);
+	if (IS_ERR(hdmi->extcon)) {
+		dev_err(hdmi->dev, "allocate extcon failed\n");
+		ret = PTR_ERR(hdmi->extcon);
+		goto err_res;
+	}
+
+	ret = devm_extcon_dev_register(hdmi->dev, hdmi->extcon);
+	if (ret) {
+		dev_err(hdmi->dev, "failed to register extcon: %d\n", ret);
+		goto err_res;
+	}
+
+	ret = extcon_set_property_capability(hdmi->extcon, EXTCON_DISP_HDMI,
+					     EXTCON_PROP_DISP_HPD);
+	if (ret) {
+		dev_err(hdmi->dev,
+			"failed to set USB property capability: %d\n", ret);
+		goto err_res;
+	}
+
+	/* Reset HDMI DDC I2C master controller and mute I2CM interrupts */
+	if (hdmi->i2c)
+		dw_hdmi_i2c_init(hdmi);
+
+	init_completion(&hdmi->flt_cmp);
+	init_completion(&hdmi->earc_cmp);
+
+	if (of_property_read_bool(np, "scramble-low-rates"))
+		hdmi->scramble_low_rates = true;
+
+	dw_hdmi_register_debugfs(dev, hdmi);
+
+	return hdmi;
+
+err_res:
+	if (hdmi->i2c)
+		i2c_del_adapter(&hdmi->i2c->adap);
+	else
+		i2c_put_adapter(hdmi->ddc);
+
+	return ERR_PTR(ret);
+}
+
+static void __dw_hdmi_remove(struct dw_hdmi_qp *hdmi)
+{
+	if (hdmi->avp_irq)
+		disable_irq(hdmi->avp_irq);
+
+	if (hdmi->main_irq)
+		disable_irq(hdmi->main_irq);
+
+	if (hdmi->earc_irq)
+		disable_irq(hdmi->earc_irq);
+
+	debugfs_remove_recursive(hdmi->debugfs_dir);
+
+	if (!hdmi->plat_data->first_screen) {
+		dw_hdmi_destroy_properties(hdmi);
+		hdmi->connector.funcs->destroy(&hdmi->connector);
+	}
+
+	if (hdmi->audio && !IS_ERR(hdmi->audio))
+		platform_device_unregister(hdmi->audio);
+
+	// [CC:] dw_hdmi_rockchip_unbind() also calls drm_encoder_cleanup()
+	// and causes a seg fault due to NULL ptr dererence
+	// if (hdmi->bridge.encoder && !hdmi->plat_data->first_screen)
+	// 	hdmi->bridge.encoder->funcs->destroy(hdmi->bridge.encoder);
+	//
+	if (!IS_ERR(hdmi->cec))
+		platform_device_unregister(hdmi->cec);
+	if (hdmi->i2c)
+		i2c_del_adapter(&hdmi->i2c->adap);
+	else
+		i2c_put_adapter(hdmi->ddc);
+}
+
+/* -----------------------------------------------------------------------------
+ * Bind/unbind API, used from platforms based on the component framework.
+ */
+struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev,
+				   struct drm_encoder *encoder,
+				   struct dw_hdmi_plat_data *plat_data)
+{
+	struct dw_hdmi_qp *hdmi;
+	int ret;
+
+	hdmi = __dw_hdmi_probe(pdev, plat_data);
+	if (IS_ERR(hdmi))
+		return hdmi;
+
+	if (!plat_data->first_screen) {
+		ret = drm_bridge_attach(encoder, &hdmi->bridge, NULL, 0);
+		if (ret) {
+			__dw_hdmi_remove(hdmi);
+			dev_err(hdmi->dev, "Failed to initialize bridge with drm\n");
+			return ERR_PTR(ret);
+		}
+
+		plat_data->connector = &hdmi->connector;
+	}
+
+	if (plat_data->split_mode && !hdmi->plat_data->first_screen) {
+		struct dw_hdmi_qp *secondary = NULL;
+
+		if (hdmi->plat_data->left)
+			secondary = hdmi->plat_data->left;
+		else if (hdmi->plat_data->right)
+			secondary = hdmi->plat_data->right;
+
+		if (!secondary)
+			return ERR_PTR(-ENOMEM);
+		ret = drm_bridge_attach(encoder, &secondary->bridge, &hdmi->bridge,
+					DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return hdmi;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_qp_bind);
+
+void dw_hdmi_qp_unbind(struct dw_hdmi_qp *hdmi)
+{
+	__dw_hdmi_remove(hdmi);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_qp_unbind);
+
+void dw_hdmi_qp_suspend(struct device *dev, struct dw_hdmi_qp *hdmi)
+{
+	if (!hdmi) {
+		dev_warn(dev, "Hdmi has not been initialized\n");
+		return;
+	}
+
+	mutex_lock(&hdmi->mutex);
+
+	/*
+	 * When system shutdown, hdmi should be disabled.
+	 * When system suspend, dw_hdmi_qp_bridge_disable will disable hdmi first.
+	 * To prevent duplicate operation, we should determine whether hdmi
+	 * has been disabled.
+	 */
+	if (!hdmi->disabled)
+		hdmi->disabled = true;
+	mutex_unlock(&hdmi->mutex);
+
+	if (hdmi->avp_irq)
+		disable_irq(hdmi->avp_irq);
+
+	if (hdmi->main_irq)
+		disable_irq(hdmi->main_irq);
+
+	if (hdmi->earc_irq)
+		disable_irq(hdmi->earc_irq);
+
+	pinctrl_pm_select_sleep_state(dev);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_qp_suspend);
+
+void dw_hdmi_qp_resume(struct device *dev, struct dw_hdmi_qp *hdmi)
+{
+	if (!hdmi) {
+		dev_warn(dev, "Hdmi has not been initialized\n");
+		return;
+	}
+
+	hdmi_writel(hdmi, 0, MAINUNIT_0_INT_MASK_N);
+	hdmi_writel(hdmi, 0, MAINUNIT_1_INT_MASK_N);
+	hdmi_writel(hdmi, 428571429, TIMER_BASE_CONFIG0);
+
+	pinctrl_pm_select_default_state(dev);
+
+	mutex_lock(&hdmi->mutex);
+	if (hdmi->i2c)
+		dw_hdmi_i2c_init(hdmi);
+	if (hdmi->avp_irq)
+		enable_irq(hdmi->avp_irq);
+
+	if (hdmi->main_irq)
+		enable_irq(hdmi->main_irq);
+
+	if (hdmi->earc_irq)
+		enable_irq(hdmi->earc_irq);
+
+	mutex_unlock(&hdmi->mutex);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_qp_resume);
+
+MODULE_AUTHOR("Algea Cao <algea.cao@rock-chips.com>");
+MODULE_DESCRIPTION("DW HDMI QP transmitter driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:dw-hdmi-qp");
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h
new file mode 100644
index 000000000000..4cac70f2d11d
--- /dev/null
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h
@@ -0,0 +1,831 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Rockchip Electronics Co.Ltd
+ * Author:
+ *      Algea Cao <algea.cao@rock-chips.com>
+ */
+#ifndef __DW_HDMI_QP_H__
+#define __DW_HDMI_QP_H__
+/* Main Unit Registers */
+#define CORE_ID						0x0
+#define VER_NUMBER					0x4
+#define VER_TYPE					0x8
+#define CONFIG_REG					0xc
+#define CONFIG_CEC					BIT(28)
+#define CONFIG_AUD_UD					BIT(23)
+#define CORE_TIMESTAMP_HHMM				0x14
+#define CORE_TIMESTAMP_MMDD				0x18
+#define CORE_TIMESTAMP_YYYY				0x1c
+/* Reset Manager Registers */
+#define GLOBAL_SWRESET_REQUEST				0x40
+#define EARCRX_CMDC_SWINIT_P				BIT(27)
+#define AVP_DATAPATH_PACKET_AUDIO_SWINIT_P		BIT(10)
+#define GLOBAL_SWDISABLE				0x44
+#define CEC_SWDISABLE					BIT(17)
+#define AVP_DATAPATH_PACKET_AUDIO_SWDISABLE		BIT(10)
+#define AVP_DATAPATH_VIDEO_SWDISABLE			BIT(6)
+#define RESET_MANAGER_CONFIG0				0x48
+#define RESET_MANAGER_STATUS0				0x50
+#define RESET_MANAGER_STATUS1				0x54
+#define RESET_MANAGER_STATUS2				0x58
+/* Timer Base Registers */
+#define TIMER_BASE_CONFIG0				0x80
+#define TIMER_BASE_STATUS0				0x84
+/* CMU Registers */
+#define CMU_CONFIG0					0xa0
+#define CMU_CONFIG1					0xa4
+#define CMU_CONFIG2					0xa8
+#define CMU_CONFIG3					0xac
+#define CMU_STATUS					0xb0
+#define DISPLAY_CLK_MONITOR				0x3f
+#define DISPLAY_CLK_LOCKED				0X15
+#define EARC_BPCLK_OFF					BIT(9)
+#define AUDCLK_OFF					BIT(7)
+#define LINKQPCLK_OFF					BIT(5)
+#define VIDQPCLK_OFF					BIT(3)
+#define IPI_CLK_OFF					BIT(1)
+#define CMU_IPI_CLK_FREQ				0xb4
+#define CMU_VIDQPCLK_FREQ				0xb8
+#define CMU_LINKQPCLK_FREQ				0xbc
+#define CMU_AUDQPCLK_FREQ				0xc0
+#define CMU_EARC_BPCLK_FREQ				0xc4
+/* I2CM Registers */
+#define I2CM_SM_SCL_CONFIG0				0xe0
+#define I2CM_FM_SCL_CONFIG0				0xe4
+#define I2CM_CONFIG0					0xe8
+#define I2CM_CONTROL0					0xec
+#define I2CM_STATUS0					0xf0
+#define I2CM_INTERFACE_CONTROL0				0xf4
+#define I2CM_ADDR					0xff000
+#define I2CM_SLVADDR					0xfe0
+#define I2CM_WR_MASK					0x1e
+#define I2CM_EXT_READ					BIT(4)
+#define I2CM_SHORT_READ					BIT(3)
+#define I2CM_FM_READ					BIT(2)
+#define I2CM_FM_WRITE					BIT(1)
+#define I2CM_FM_EN					BIT(0)
+#define I2CM_INTERFACE_CONTROL1				0xf8
+#define I2CM_SEG_PTR					0x7f80
+#define I2CM_SEG_ADDR					0x7f
+#define I2CM_INTERFACE_WRDATA_0_3			0xfc
+#define I2CM_INTERFACE_WRDATA_4_7			0x100
+#define I2CM_INTERFACE_WRDATA_8_11			0x104
+#define I2CM_INTERFACE_WRDATA_12_15			0x108
+#define I2CM_INTERFACE_RDDATA_0_3			0x10c
+#define I2CM_INTERFACE_RDDATA_4_7			0x110
+#define I2CM_INTERFACE_RDDATA_8_11			0x114
+#define I2CM_INTERFACE_RDDATA_12_15			0x118
+/* SCDC Registers */
+#define SCDC_CONFIG0					0x140
+#define SCDC_I2C_FM_EN					BIT(12)
+#define SCDC_UPD_FLAGS_AUTO_CLR				BIT(6)
+#define SCDC_UPD_FLAGS_POLL_EN				BIT(4)
+#define SCDC_CONTROL0					0x148
+#define SCDC_STATUS0					0x150
+#define STATUS_UPDATE					BIT(0)
+#define FRL_START					BIT(4)
+#define FLT_UPDATE					BIT(5)
+/* FLT Registers */
+#define FLT_CONFIG0					0x160
+#define FLT_CONFIG1					0x164
+#define FLT_CONFIG2					0x168
+#define FLT_CONTROL0					0x170
+/*  Main Unit 2 Registers */
+#define MAINUNIT_STATUS0				0x180
+/* Video Interface Registers */
+#define VIDEO_INTERFACE_CONFIG0				0x800
+#define VIDEO_INTERFACE_CONFIG1				0x804
+#define VIDEO_INTERFACE_CONFIG2				0x808
+#define VIDEO_INTERFACE_CONTROL0			0x80c
+#define VIDEO_INTERFACE_STATUS0				0x814
+/* Video Packing Registers */
+#define VIDEO_PACKING_CONFIG0				0x81c
+/* Audio Interface Registers */
+#define AUDIO_INTERFACE_CONFIG0				0x820
+#define AUD_IF_SEL_MSK					0x3
+#define AUD_IF_SPDIF					0x2
+#define AUD_IF_I2S					0x1
+#define AUD_IF_PAI					0x0
+#define AUD_FIFO_INIT_ON_OVF_MSK			BIT(2)
+#define AUD_FIFO_INIT_ON_OVF_EN				BIT(2)
+#define I2S_LINES_EN_MSK				GENMASK(7, 4)
+#define I2S_LINES_EN(x)					BIT(x + 4)
+#define I2S_BPCUV_RCV_MSK				BIT(12)
+#define I2S_BPCUV_RCV_EN				BIT(12)
+#define I2S_BPCUV_RCV_DIS				0
+#define SPDIF_LINES_EN					GENMASK(19, 16)
+#define AUD_FORMAT_MSK					GENMASK(26, 24)
+#define AUD_3DOBA					(0x7 << 24)
+#define AUD_3DASP					(0x6 << 24)
+#define AUD_MSOBA					(0x5 << 24)
+#define AUD_MSASP					(0x4 << 24)
+#define AUD_HBR						(0x3 << 24)
+#define AUD_DST						(0x2 << 24)
+#define AUD_OBA						(0x1 << 24)
+#define AUD_ASP						(0x0 << 24)
+#define AUDIO_INTERFACE_CONFIG1				0x824
+#define AUDIO_INTERFACE_CONTROL0			0x82c
+#define AUDIO_FIFO_CLR_P				BIT(0)
+#define AUDIO_INTERFACE_STATUS0				0x834
+/* Frame Composer Registers */
+#define FRAME_COMPOSER_CONFIG0				0x840
+#define FRAME_COMPOSER_CONFIG1				0x844
+#define FRAME_COMPOSER_CONFIG2				0x848
+#define FRAME_COMPOSER_CONFIG3				0x84c
+#define FRAME_COMPOSER_CONFIG4				0x850
+#define FRAME_COMPOSER_CONFIG5				0x854
+#define FRAME_COMPOSER_CONFIG6				0x858
+#define FRAME_COMPOSER_CONFIG7				0x85c
+#define FRAME_COMPOSER_CONFIG8				0x860
+#define FRAME_COMPOSER_CONFIG9				0x864
+#define FRAME_COMPOSER_CONTROL0				0x86c
+/* Video Monitor Registers */
+#define VIDEO_MONITOR_CONFIG0				0x880
+#define VIDEO_MONITOR_STATUS0				0x884
+#define VIDEO_MONITOR_STATUS1				0x888
+#define VIDEO_MONITOR_STATUS2				0x88c
+#define VIDEO_MONITOR_STATUS3				0x890
+#define VIDEO_MONITOR_STATUS4				0x894
+#define VIDEO_MONITOR_STATUS5				0x898
+#define VIDEO_MONITOR_STATUS6				0x89c
+/* HDCP2 Logic Registers */
+#define HDCP2LOGIC_CONFIG0				0x8e0
+#define HDCP2_BYPASS					BIT(0)
+#define HDCP2LOGIC_ESM_GPIO_IN				0x8e4
+#define HDCP2LOGIC_ESM_GPIO_OUT				0x8e8
+/* HDCP14 Registers */
+#define HDCP14_CONFIG0					0x900
+#define HDCP14_CONFIG1					0x904
+#define HDCP14_CONFIG2					0x908
+#define HDCP14_CONFIG3					0x90c
+#define HDCP14_KEY_SEED					0x914
+#define HDCP14_KEY_H					0x918
+#define HDCP14_KEY_L					0x91c
+#define HDCP14_KEY_STATUS				0x920
+#define HDCP14_AKSV_H					0x924
+#define HDCP14_AKSV_L					0x928
+#define HDCP14_AN_H					0x92c
+#define HDCP14_AN_L					0x930
+#define HDCP14_STATUS0					0x934
+#define HDCP14_STATUS1					0x938
+/* Scrambler Registers */
+#define SCRAMB_CONFIG0					0x960
+/* Video Configuration Registers */
+#define LINK_CONFIG0					0x968
+#define OPMODE_FRL_4LANES				BIT(8)
+#define OPMODE_DVI					BIT(4)
+#define OPMODE_FRL					BIT(0)
+/* TMDS FIFO Registers */
+#define TMDS_FIFO_CONFIG0				0x970
+#define TMDS_FIFO_CONTROL0				0x974
+/* FRL RSFEC Registers */
+#define FRL_RSFEC_CONFIG0				0xa20
+#define FRL_RSFEC_STATUS0				0xa30
+/* FRL Packetizer Registers */
+#define FRL_PKTZ_CONFIG0				0xa40
+#define FRL_PKTZ_CONTROL0				0xa44
+#define FRL_PKTZ_CONTROL1				0xa50
+#define FRL_PKTZ_STATUS1				0xa54
+/* Packet Scheduler Registers */
+#define PKTSCHED_CONFIG0				0xa80
+#define PKTSCHED_PRQUEUE0_CONFIG0			0xa84
+#define PKTSCHED_PRQUEUE1_CONFIG0			0xa88
+#define PKTSCHED_PRQUEUE2_CONFIG0			0xa8c
+#define PKTSCHED_PRQUEUE2_CONFIG1			0xa90
+#define PKTSCHED_PRQUEUE2_CONFIG2			0xa94
+#define PKTSCHED_PKT_CONFIG0				0xa98
+#define PKTSCHED_PKT_CONFIG1				0xa9c
+#define PKTSCHED_DRMI_FIELDRATE				BIT(13)
+#define PKTSCHED_AVI_FIELDRATE				BIT(12)
+#define PKTSCHED_PKT_CONFIG2				0xaa0
+#define PKTSCHED_PKT_CONFIG3				0xaa4
+#define PKTSCHED_PKT_EN					0xaa8
+#define PKTSCHED_DRMI_TX_EN				BIT(17)
+#define PKTSCHED_AUDI_TX_EN				BIT(15)
+#define PKTSCHED_AVI_TX_EN				BIT(13)
+#define PKTSCHED_EMP_CVTEM_TX_EN			BIT(10)
+#define PKTSCHED_AMD_TX_EN				BIT(8)
+#define PKTSCHED_GCP_TX_EN				BIT(3)
+#define PKTSCHED_AUDS_TX_EN				BIT(2)
+#define PKTSCHED_ACR_TX_EN				BIT(1)
+#define PKTSCHED_NULL_TX_EN				BIT(0)
+#define PKTSCHED_PKT_CONTROL0				0xaac
+#define PKTSCHED_PKT_SEND				0xab0
+#define PKTSCHED_PKT_STATUS0				0xab4
+#define PKTSCHED_PKT_STATUS1				0xab8
+#define PKT_NULL_CONTENTS0				0xb00
+#define PKT_NULL_CONTENTS1				0xb04
+#define PKT_NULL_CONTENTS2				0xb08
+#define PKT_NULL_CONTENTS3				0xb0c
+#define PKT_NULL_CONTENTS4				0xb10
+#define PKT_NULL_CONTENTS5				0xb14
+#define PKT_NULL_CONTENTS6				0xb18
+#define PKT_NULL_CONTENTS7				0xb1c
+#define PKT_ACP_CONTENTS0				0xb20
+#define PKT_ACP_CONTENTS1				0xb24
+#define PKT_ACP_CONTENTS2				0xb28
+#define PKT_ACP_CONTENTS3				0xb2c
+#define PKT_ACP_CONTENTS4				0xb30
+#define PKT_ACP_CONTENTS5				0xb34
+#define PKT_ACP_CONTENTS6				0xb38
+#define PKT_ACP_CONTENTS7				0xb3c
+#define PKT_ISRC1_CONTENTS0				0xb40
+#define PKT_ISRC1_CONTENTS1				0xb44
+#define PKT_ISRC1_CONTENTS2				0xb48
+#define PKT_ISRC1_CONTENTS3				0xb4c
+#define PKT_ISRC1_CONTENTS4				0xb50
+#define PKT_ISRC1_CONTENTS5				0xb54
+#define PKT_ISRC1_CONTENTS6				0xb58
+#define PKT_ISRC1_CONTENTS7				0xb5c
+#define PKT_ISRC2_CONTENTS0				0xb60
+#define PKT_ISRC2_CONTENTS1				0xb64
+#define PKT_ISRC2_CONTENTS2				0xb68
+#define PKT_ISRC2_CONTENTS3				0xb6c
+#define PKT_ISRC2_CONTENTS4				0xb70
+#define PKT_ISRC2_CONTENTS5				0xb74
+#define PKT_ISRC2_CONTENTS6				0xb78
+#define PKT_ISRC2_CONTENTS7				0xb7c
+#define PKT_GMD_CONTENTS0				0xb80
+#define PKT_GMD_CONTENTS1				0xb84
+#define PKT_GMD_CONTENTS2				0xb88
+#define PKT_GMD_CONTENTS3				0xb8c
+#define PKT_GMD_CONTENTS4				0xb90
+#define PKT_GMD_CONTENTS5				0xb94
+#define PKT_GMD_CONTENTS6				0xb98
+#define PKT_GMD_CONTENTS7				0xb9c
+#define PKT_AMD_CONTENTS0				0xba0
+#define PKT_AMD_CONTENTS1				0xba4
+#define PKT_AMD_CONTENTS2				0xba8
+#define PKT_AMD_CONTENTS3				0xbac
+#define PKT_AMD_CONTENTS4				0xbb0
+#define PKT_AMD_CONTENTS5				0xbb4
+#define PKT_AMD_CONTENTS6				0xbb8
+#define PKT_AMD_CONTENTS7				0xbbc
+#define PKT_VSI_CONTENTS0				0xbc0
+#define PKT_VSI_CONTENTS1				0xbc4
+#define PKT_VSI_CONTENTS2				0xbc8
+#define PKT_VSI_CONTENTS3				0xbcc
+#define PKT_VSI_CONTENTS4				0xbd0
+#define PKT_VSI_CONTENTS5				0xbd4
+#define PKT_VSI_CONTENTS6				0xbd8
+#define PKT_VSI_CONTENTS7				0xbdc
+#define PKT_AVI_CONTENTS0				0xbe0
+#define HDMI_FC_AVICONF0_ACTIVE_FMT_INFO_PRESENT	BIT(4)
+#define HDMI_FC_AVICONF0_BAR_DATA_VERT_BAR		0x04
+#define HDMI_FC_AVICONF0_BAR_DATA_HORIZ_BAR		0x08
+#define HDMI_FC_AVICONF2_IT_CONTENT_VALID		0x80
+#define PKT_AVI_CONTENTS1				0xbe4
+#define PKT_AVI_CONTENTS2				0xbe8
+#define PKT_AVI_CONTENTS3				0xbec
+#define PKT_AVI_CONTENTS4				0xbf0
+#define PKT_AVI_CONTENTS5				0xbf4
+#define PKT_AVI_CONTENTS6				0xbf8
+#define PKT_AVI_CONTENTS7				0xbfc
+#define PKT_SPDI_CONTENTS0				0xc00
+#define PKT_SPDI_CONTENTS1				0xc04
+#define PKT_SPDI_CONTENTS2				0xc08
+#define PKT_SPDI_CONTENTS3				0xc0c
+#define PKT_SPDI_CONTENTS4				0xc10
+#define PKT_SPDI_CONTENTS5				0xc14
+#define PKT_SPDI_CONTENTS6				0xc18
+#define PKT_SPDI_CONTENTS7				0xc1c
+#define PKT_AUDI_CONTENTS0				0xc20
+#define PKT_AUDI_CONTENTS1				0xc24
+#define PKT_AUDI_CONTENTS2				0xc28
+#define PKT_AUDI_CONTENTS3				0xc2c
+#define PKT_AUDI_CONTENTS4				0xc30
+#define PKT_AUDI_CONTENTS5				0xc34
+#define PKT_AUDI_CONTENTS6				0xc38
+#define PKT_AUDI_CONTENTS7				0xc3c
+#define PKT_NVI_CONTENTS0				0xc40
+#define PKT_NVI_CONTENTS1				0xc44
+#define PKT_NVI_CONTENTS2				0xc48
+#define PKT_NVI_CONTENTS3				0xc4c
+#define PKT_NVI_CONTENTS4				0xc50
+#define PKT_NVI_CONTENTS5				0xc54
+#define PKT_NVI_CONTENTS6				0xc58
+#define PKT_NVI_CONTENTS7				0xc5c
+#define PKT_DRMI_CONTENTS0				0xc60
+#define PKT_DRMI_CONTENTS1				0xc64
+#define PKT_DRMI_CONTENTS2				0xc68
+#define PKT_DRMI_CONTENTS3				0xc6c
+#define PKT_DRMI_CONTENTS4				0xc70
+#define PKT_DRMI_CONTENTS5				0xc74
+#define PKT_DRMI_CONTENTS6				0xc78
+#define PKT_DRMI_CONTENTS7				0xc7c
+#define PKT_GHDMI1_CONTENTS0				0xc80
+#define PKT_GHDMI1_CONTENTS1				0xc84
+#define PKT_GHDMI1_CONTENTS2				0xc88
+#define PKT_GHDMI1_CONTENTS3				0xc8c
+#define PKT_GHDMI1_CONTENTS4				0xc90
+#define PKT_GHDMI1_CONTENTS5				0xc94
+#define PKT_GHDMI1_CONTENTS6				0xc98
+#define PKT_GHDMI1_CONTENTS7				0xc9c
+#define PKT_GHDMI2_CONTENTS0				0xca0
+#define PKT_GHDMI2_CONTENTS1				0xca4
+#define PKT_GHDMI2_CONTENTS2				0xca8
+#define PKT_GHDMI2_CONTENTS3				0xcac
+#define PKT_GHDMI2_CONTENTS4				0xcb0
+#define PKT_GHDMI2_CONTENTS5				0xcb4
+#define PKT_GHDMI2_CONTENTS6				0xcb8
+#define PKT_GHDMI2_CONTENTS7				0xcbc
+/* EMP Packetizer Registers */
+#define PKT_EMP_CONFIG0					0xce0
+#define PKT_EMP_CONTROL0				0xcec
+#define PKT_EMP_CONTROL1				0xcf0
+#define PKT_EMP_CONTROL2				0xcf4
+#define PKT_EMP_VTEM_CONTENTS0				0xd00
+#define PKT_EMP_VTEM_CONTENTS1				0xd04
+#define PKT_EMP_VTEM_CONTENTS2				0xd08
+#define PKT_EMP_VTEM_CONTENTS3				0xd0c
+#define PKT_EMP_VTEM_CONTENTS4				0xd10
+#define PKT_EMP_VTEM_CONTENTS5				0xd14
+#define PKT_EMP_VTEM_CONTENTS6				0xd18
+#define PKT_EMP_VTEM_CONTENTS7				0xd1c
+#define PKT0_EMP_CVTEM_CONTENTS0			0xd20
+#define PKT0_EMP_CVTEM_CONTENTS1			0xd24
+#define PKT0_EMP_CVTEM_CONTENTS2			0xd28
+#define PKT0_EMP_CVTEM_CONTENTS3			0xd2c
+#define PKT0_EMP_CVTEM_CONTENTS4			0xd30
+#define PKT0_EMP_CVTEM_CONTENTS5			0xd34
+#define PKT0_EMP_CVTEM_CONTENTS6			0xd38
+#define PKT0_EMP_CVTEM_CONTENTS7			0xd3c
+#define PKT1_EMP_CVTEM_CONTENTS0			0xd40
+#define PKT1_EMP_CVTEM_CONTENTS1			0xd44
+#define PKT1_EMP_CVTEM_CONTENTS2			0xd48
+#define PKT1_EMP_CVTEM_CONTENTS3			0xd4c
+#define PKT1_EMP_CVTEM_CONTENTS4			0xd50
+#define PKT1_EMP_CVTEM_CONTENTS5			0xd54
+#define PKT1_EMP_CVTEM_CONTENTS6			0xd58
+#define PKT1_EMP_CVTEM_CONTENTS7			0xd5c
+#define PKT2_EMP_CVTEM_CONTENTS0			0xd60
+#define PKT2_EMP_CVTEM_CONTENTS1			0xd64
+#define PKT2_EMP_CVTEM_CONTENTS2			0xd68
+#define PKT2_EMP_CVTEM_CONTENTS3			0xd6c
+#define PKT2_EMP_CVTEM_CONTENTS4			0xd70
+#define PKT2_EMP_CVTEM_CONTENTS5			0xd74
+#define PKT2_EMP_CVTEM_CONTENTS6			0xd78
+#define PKT2_EMP_CVTEM_CONTENTS7			0xd7c
+#define PKT3_EMP_CVTEM_CONTENTS0			0xd80
+#define PKT3_EMP_CVTEM_CONTENTS1			0xd84
+#define PKT3_EMP_CVTEM_CONTENTS2			0xd88
+#define PKT3_EMP_CVTEM_CONTENTS3			0xd8c
+#define PKT3_EMP_CVTEM_CONTENTS4			0xd90
+#define PKT3_EMP_CVTEM_CONTENTS5			0xd94
+#define PKT3_EMP_CVTEM_CONTENTS6			0xd98
+#define PKT3_EMP_CVTEM_CONTENTS7			0xd9c
+#define PKT4_EMP_CVTEM_CONTENTS0			0xda0
+#define PKT4_EMP_CVTEM_CONTENTS1			0xda4
+#define PKT4_EMP_CVTEM_CONTENTS2			0xda8
+#define PKT4_EMP_CVTEM_CONTENTS3			0xdac
+#define PKT4_EMP_CVTEM_CONTENTS4			0xdb0
+#define PKT4_EMP_CVTEM_CONTENTS5			0xdb4
+#define PKT4_EMP_CVTEM_CONTENTS6			0xdb8
+#define PKT4_EMP_CVTEM_CONTENTS7			0xdbc
+#define PKT5_EMP_CVTEM_CONTENTS0			0xdc0
+#define PKT5_EMP_CVTEM_CONTENTS1			0xdc4
+#define PKT5_EMP_CVTEM_CONTENTS2			0xdc8
+#define PKT5_EMP_CVTEM_CONTENTS3			0xdcc
+#define PKT5_EMP_CVTEM_CONTENTS4			0xdd0
+#define PKT5_EMP_CVTEM_CONTENTS5			0xdd4
+#define PKT5_EMP_CVTEM_CONTENTS6			0xdd8
+#define PKT5_EMP_CVTEM_CONTENTS7			0xddc
+/* Audio Packetizer Registers */
+#define AUDPKT_CONTROL0					0xe20
+#define AUDPKT_PBIT_FORCE_EN_MASK			BIT(12)
+#define AUDPKT_PBIT_FORCE_EN				BIT(12)
+#define AUDPKT_CHSTATUS_OVR_EN_MASK			BIT(0)
+#define AUDPKT_CHSTATUS_OVR_EN				BIT(0)
+#define AUDPKT_CONTROL1					0xe24
+#define AUDPKT_ACR_CONTROL0				0xe40
+#define AUDPKT_ACR_N_VALUE				0xfffff
+#define AUDPKT_ACR_CONTROL1				0xe44
+#define AUDPKT_ACR_CTS_OVR_VAL_MSK			GENMASK(23, 4)
+#define AUDPKT_ACR_CTS_OVR_VAL(x)			((x) << 4)
+#define AUDPKT_ACR_CTS_OVR_EN_MSK			BIT(1)
+#define AUDPKT_ACR_CTS_OVR_EN				BIT(1)
+#define AUDPKT_ACR_STATUS0				0xe4c
+#define AUDPKT_CHSTATUS_OVR0				0xe60
+#define AUDPKT_CHSTATUS_OVR1				0xe64
+/* IEC60958 Byte 3: Sampleing frenuency Bits 24 to 27 */
+#define AUDPKT_CHSTATUS_SR_MASK				GENMASK(3, 0)
+#define AUDPKT_CHSTATUS_SR_22050			0x4
+#define AUDPKT_CHSTATUS_SR_24000			0x6
+#define AUDPKT_CHSTATUS_SR_32000			0x3
+#define AUDPKT_CHSTATUS_SR_44100			0x0
+#define AUDPKT_CHSTATUS_SR_48000			0x2
+#define AUDPKT_CHSTATUS_SR_88200			0x8
+#define AUDPKT_CHSTATUS_SR_96000			0xa
+#define AUDPKT_CHSTATUS_SR_176400			0xc
+#define AUDPKT_CHSTATUS_SR_192000			0xe
+#define AUDPKT_CHSTATUS_SR_768000			0x9
+#define AUDPKT_CHSTATUS_SR_NOT_INDICATED		0x1
+/* IEC60958 Byte 4: Original Sampleing frenuency Bits 36 to 39 */
+#define AUDPKT_CHSTATUS_0SR_MASK			GENMASK(15, 12)
+#define AUDPKT_CHSTATUS_OSR_8000			0x6
+#define AUDPKT_CHSTATUS_OSR_11025			0xa
+#define AUDPKT_CHSTATUS_OSR_12000			0x2
+#define AUDPKT_CHSTATUS_OSR_16000			0x8
+#define AUDPKT_CHSTATUS_OSR_22050			0xb
+#define AUDPKT_CHSTATUS_OSR_24000			0x9
+#define AUDPKT_CHSTATUS_OSR_32000			0xc
+#define AUDPKT_CHSTATUS_OSR_44100			0xf
+#define AUDPKT_CHSTATUS_OSR_48000			0xd
+#define AUDPKT_CHSTATUS_OSR_88200			0x7
+#define AUDPKT_CHSTATUS_OSR_96000			0x5
+#define AUDPKT_CHSTATUS_OSR_176400			0x3
+#define AUDPKT_CHSTATUS_OSR_192000			0x1
+#define AUDPKT_CHSTATUS_OSR_NOT_INDICATED		0x0
+#define AUDPKT_CHSTATUS_OVR2				0xe68
+#define AUDPKT_CHSTATUS_OVR3				0xe6c
+#define AUDPKT_CHSTATUS_OVR4				0xe70
+#define AUDPKT_CHSTATUS_OVR5				0xe74
+#define AUDPKT_CHSTATUS_OVR6				0xe78
+#define AUDPKT_CHSTATUS_OVR7				0xe7c
+#define AUDPKT_CHSTATUS_OVR8				0xe80
+#define AUDPKT_CHSTATUS_OVR9				0xe84
+#define AUDPKT_CHSTATUS_OVR10				0xe88
+#define AUDPKT_CHSTATUS_OVR11				0xe8c
+#define AUDPKT_CHSTATUS_OVR12				0xe90
+#define AUDPKT_CHSTATUS_OVR13				0xe94
+#define AUDPKT_CHSTATUS_OVR14				0xe98
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC0			0xea0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC1			0xea4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC2			0xea8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC3			0xeac
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC4			0xeb0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC5			0xeb4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC6			0xeb8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC7			0xebc
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC8			0xec0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC9			0xec4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC10		0xec8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC11		0xecc
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC12		0xed0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC13		0xed4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC14		0xed8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC15		0xedc
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC16		0xee0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC17		0xee4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC18		0xee8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC19		0xeec
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC20		0xef0
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC21		0xef4
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC22		0xef8
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC23		0xefc
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC24		0xf00
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC25		0xf04
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC26		0xf08
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC27		0xf0c
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC28		0xf10
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC29		0xf14
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC30		0xf18
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC31		0xf1c
+#define AUDPKT_USRDATA_OVR_MSG_GENERIC32		0xf20
+#define AUDPKT_VBIT_OVR0				0xf24
+/* CEC Registers */
+#define CEC_TX_CONTROL					0x1000
+#define CEC_STATUS					0x1004
+#define CEC_CONFIG					0x1008
+#define CEC_ADDR					0x100c
+#define CEC_TX_COUNT					0x1020
+#define CEC_TX_DATA3_0					0x1024
+#define CEC_TX_DATA7_4					0x1028
+#define CEC_TX_DATA11_8					0x102c
+#define CEC_TX_DATA15_12				0x1030
+#define CEC_RX_COUNT_STATUS				0x1040
+#define CEC_RX_DATA3_0					0x1044
+#define CEC_RX_DATA7_4					0x1048
+#define CEC_RX_DATA11_8					0x104c
+#define CEC_RX_DATA15_12				0x1050
+#define CEC_LOCK_CONTROL				0x1054
+#define CEC_RXQUAL_BITTIME_CONFIG			0x1060
+#define CEC_RX_BITTIME_CONFIG				0x1064
+#define CEC_TX_BITTIME_CONFIG				0x1068
+/* eARC RX CMDC Registers */
+#define EARCRX_CMDC_CONFIG0				0x1800
+#define EARCRX_XACTREAD_STOP_CFG			BIT(26)
+#define EARCRX_XACTREAD_RETRY_CFG			BIT(25)
+#define EARCRX_CMDC_DSCVR_EARCVALID0_TO_DISC1		BIT(24)
+#define EARCRX_CMDC_XACT_RESTART_EN			BIT(18)
+#define EARCRX_CMDC_CONFIG1				0x1804
+#define EARCRX_CMDC_CONTROL				0x1808
+#define EARCRX_CMDC_HEARTBEAT_LOSS_EN			BIT(4)
+#define EARCRX_CMDC_DISCOVERY_EN			BIT(3)
+#define EARCRX_CONNECTOR_HPD				BIT(1)
+#define EARCRX_CMDC_WHITELIST0_CONFIG			0x180c
+#define EARCRX_CMDC_WHITELIST1_CONFIG			0x1810
+#define EARCRX_CMDC_WHITELIST2_CONFIG			0x1814
+#define EARCRX_CMDC_WHITELIST3_CONFIG			0x1818
+#define EARCRX_CMDC_STATUS				0x181c
+#define EARCRX_CMDC_XACT_INFO				0x1820
+#define EARCRX_CMDC_XACT_ACTION				0x1824
+#define EARCRX_CMDC_HEARTBEAT_RXSTAT_SE			0x1828
+#define EARCRX_CMDC_HEARTBEAT_STATUS			0x182c
+#define EARCRX_CMDC_XACT_WR0				0x1840
+#define EARCRX_CMDC_XACT_WR1				0x1844
+#define EARCRX_CMDC_XACT_WR2				0x1848
+#define EARCRX_CMDC_XACT_WR3				0x184c
+#define EARCRX_CMDC_XACT_WR4				0x1850
+#define EARCRX_CMDC_XACT_WR5				0x1854
+#define EARCRX_CMDC_XACT_WR6				0x1858
+#define EARCRX_CMDC_XACT_WR7				0x185c
+#define EARCRX_CMDC_XACT_WR8				0x1860
+#define EARCRX_CMDC_XACT_WR9				0x1864
+#define EARCRX_CMDC_XACT_WR10				0x1868
+#define EARCRX_CMDC_XACT_WR11				0x186c
+#define EARCRX_CMDC_XACT_WR12				0x1870
+#define EARCRX_CMDC_XACT_WR13				0x1874
+#define EARCRX_CMDC_XACT_WR14				0x1878
+#define EARCRX_CMDC_XACT_WR15				0x187c
+#define EARCRX_CMDC_XACT_WR16				0x1880
+#define EARCRX_CMDC_XACT_WR17				0x1884
+#define EARCRX_CMDC_XACT_WR18				0x1888
+#define EARCRX_CMDC_XACT_WR19				0x188c
+#define EARCRX_CMDC_XACT_WR20				0x1890
+#define EARCRX_CMDC_XACT_WR21				0x1894
+#define EARCRX_CMDC_XACT_WR22				0x1898
+#define EARCRX_CMDC_XACT_WR23				0x189c
+#define EARCRX_CMDC_XACT_WR24				0x18a0
+#define EARCRX_CMDC_XACT_WR25				0x18a4
+#define EARCRX_CMDC_XACT_WR26				0x18a8
+#define EARCRX_CMDC_XACT_WR27				0x18ac
+#define EARCRX_CMDC_XACT_WR28				0x18b0
+#define EARCRX_CMDC_XACT_WR29				0x18b4
+#define EARCRX_CMDC_XACT_WR30				0x18b8
+#define EARCRX_CMDC_XACT_WR31				0x18bc
+#define EARCRX_CMDC_XACT_WR32				0x18c0
+#define EARCRX_CMDC_XACT_WR33				0x18c4
+#define EARCRX_CMDC_XACT_WR34				0x18c8
+#define EARCRX_CMDC_XACT_WR35				0x18cc
+#define EARCRX_CMDC_XACT_WR36				0x18d0
+#define EARCRX_CMDC_XACT_WR37				0x18d4
+#define EARCRX_CMDC_XACT_WR38				0x18d8
+#define EARCRX_CMDC_XACT_WR39				0x18dc
+#define EARCRX_CMDC_XACT_WR40				0x18e0
+#define EARCRX_CMDC_XACT_WR41				0x18e4
+#define EARCRX_CMDC_XACT_WR42				0x18e8
+#define EARCRX_CMDC_XACT_WR43				0x18ec
+#define EARCRX_CMDC_XACT_WR44				0x18f0
+#define EARCRX_CMDC_XACT_WR45				0x18f4
+#define EARCRX_CMDC_XACT_WR46				0x18f8
+#define EARCRX_CMDC_XACT_WR47				0x18fc
+#define EARCRX_CMDC_XACT_WR48				0x1900
+#define EARCRX_CMDC_XACT_WR49				0x1904
+#define EARCRX_CMDC_XACT_WR50				0x1908
+#define EARCRX_CMDC_XACT_WR51				0x190c
+#define EARCRX_CMDC_XACT_WR52				0x1910
+#define EARCRX_CMDC_XACT_WR53				0x1914
+#define EARCRX_CMDC_XACT_WR54				0x1918
+#define EARCRX_CMDC_XACT_WR55				0x191c
+#define EARCRX_CMDC_XACT_WR56				0x1920
+#define EARCRX_CMDC_XACT_WR57				0x1924
+#define EARCRX_CMDC_XACT_WR58				0x1928
+#define EARCRX_CMDC_XACT_WR59				0x192c
+#define EARCRX_CMDC_XACT_WR60				0x1930
+#define EARCRX_CMDC_XACT_WR61				0x1934
+#define EARCRX_CMDC_XACT_WR62				0x1938
+#define EARCRX_CMDC_XACT_WR63				0x193c
+#define EARCRX_CMDC_XACT_WR64				0x1940
+#define EARCRX_CMDC_XACT_RD0				0x1960
+#define EARCRX_CMDC_XACT_RD1				0x1964
+#define EARCRX_CMDC_XACT_RD2				0x1968
+#define EARCRX_CMDC_XACT_RD3				0x196c
+#define EARCRX_CMDC_XACT_RD4				0x1970
+#define EARCRX_CMDC_XACT_RD5				0x1974
+#define EARCRX_CMDC_XACT_RD6				0x1978
+#define EARCRX_CMDC_XACT_RD7				0x197c
+#define EARCRX_CMDC_XACT_RD8				0x1980
+#define EARCRX_CMDC_XACT_RD9				0x1984
+#define EARCRX_CMDC_XACT_RD10				0x1988
+#define EARCRX_CMDC_XACT_RD11				0x198c
+#define EARCRX_CMDC_XACT_RD12				0x1990
+#define EARCRX_CMDC_XACT_RD13				0x1994
+#define EARCRX_CMDC_XACT_RD14				0x1998
+#define EARCRX_CMDC_XACT_RD15				0x199c
+#define EARCRX_CMDC_XACT_RD16				0x19a0
+#define EARCRX_CMDC_XACT_RD17				0x19a4
+#define EARCRX_CMDC_XACT_RD18				0x19a8
+#define EARCRX_CMDC_XACT_RD19				0x19ac
+#define EARCRX_CMDC_XACT_RD20				0x19b0
+#define EARCRX_CMDC_XACT_RD21				0x19b4
+#define EARCRX_CMDC_XACT_RD22				0x19b8
+#define EARCRX_CMDC_XACT_RD23				0x19bc
+#define EARCRX_CMDC_XACT_RD24				0x19c0
+#define EARCRX_CMDC_XACT_RD25				0x19c4
+#define EARCRX_CMDC_XACT_RD26				0x19c8
+#define EARCRX_CMDC_XACT_RD27				0x19cc
+#define EARCRX_CMDC_XACT_RD28				0x19d0
+#define EARCRX_CMDC_XACT_RD29				0x19d4
+#define EARCRX_CMDC_XACT_RD30				0x19d8
+#define EARCRX_CMDC_XACT_RD31				0x19dc
+#define EARCRX_CMDC_XACT_RD32				0x19e0
+#define EARCRX_CMDC_XACT_RD33				0x19e4
+#define EARCRX_CMDC_XACT_RD34				0x19e8
+#define EARCRX_CMDC_XACT_RD35				0x19ec
+#define EARCRX_CMDC_XACT_RD36				0x19f0
+#define EARCRX_CMDC_XACT_RD37				0x19f4
+#define EARCRX_CMDC_XACT_RD38				0x19f8
+#define EARCRX_CMDC_XACT_RD39				0x19fc
+#define EARCRX_CMDC_XACT_RD40				0x1a00
+#define EARCRX_CMDC_XACT_RD41				0x1a04
+#define EARCRX_CMDC_XACT_RD42				0x1a08
+#define EARCRX_CMDC_XACT_RD43				0x1a0c
+#define EARCRX_CMDC_XACT_RD44				0x1a10
+#define EARCRX_CMDC_XACT_RD45				0x1a14
+#define EARCRX_CMDC_XACT_RD46				0x1a18
+#define EARCRX_CMDC_XACT_RD47				0x1a1c
+#define EARCRX_CMDC_XACT_RD48				0x1a20
+#define EARCRX_CMDC_XACT_RD49				0x1a24
+#define EARCRX_CMDC_XACT_RD50				0x1a28
+#define EARCRX_CMDC_XACT_RD51				0x1a2c
+#define EARCRX_CMDC_XACT_RD52				0x1a30
+#define EARCRX_CMDC_XACT_RD53				0x1a34
+#define EARCRX_CMDC_XACT_RD54				0x1a38
+#define EARCRX_CMDC_XACT_RD55				0x1a3c
+#define EARCRX_CMDC_XACT_RD56				0x1a40
+#define EARCRX_CMDC_XACT_RD57				0x1a44
+#define EARCRX_CMDC_XACT_RD58				0x1a48
+#define EARCRX_CMDC_XACT_RD59				0x1a4c
+#define EARCRX_CMDC_XACT_RD60				0x1a50
+#define EARCRX_CMDC_XACT_RD61				0x1a54
+#define EARCRX_CMDC_XACT_RD62				0x1a58
+#define EARCRX_CMDC_XACT_RD63				0x1a5c
+#define EARCRX_CMDC_XACT_RD64				0x1a60
+#define EARCRX_CMDC_SYNC_CONFIG				0x1b00
+/* eARC RX DMAC Registers */
+#define EARCRX_DMAC_PHY_CONTROL				0x1c00
+#define EARCRX_DMAC_CONFIG				0x1c08
+#define EARCRX_DMAC_CONTROL0				0x1c0c
+#define EARCRX_DMAC_AUDIO_EN				BIT(1)
+#define EARCRX_DMAC_EN					BIT(0)
+#define EARCRX_DMAC_CONTROL1				0x1c10
+#define EARCRX_DMAC_STATUS				0x1c14
+#define EARCRX_DMAC_CHSTATUS0				0x1c18
+#define EARCRX_DMAC_CHSTATUS1				0x1c1c
+#define EARCRX_DMAC_CHSTATUS2				0x1c20
+#define EARCRX_DMAC_CHSTATUS3				0x1c24
+#define EARCRX_DMAC_CHSTATUS4				0x1c28
+#define EARCRX_DMAC_CHSTATUS5				0x1c2c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC0		0x1c30
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC1		0x1c34
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC2		0x1c38
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC3		0x1c3c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC4		0x1c40
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC5		0x1c44
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC6		0x1c48
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC7		0x1c4c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC8		0x1c50
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC9		0x1c54
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC10		0x1c58
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_AC11		0x1c5c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT0		0x1c60
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT1		0x1c64
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT2		0x1c68
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT3		0x1c6c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT4		0x1c70
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT5		0x1c74
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT6		0x1c78
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT7		0x1c7c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT8		0x1c80
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT9		0x1c84
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT10	0x1c88
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC1_PKT11	0x1c8c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT0		0x1c90
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT1		0x1c94
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT2		0x1c98
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT3		0x1c9c
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT4		0x1ca0
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT5		0x1ca4
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT6		0x1ca8
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT7		0x1cac
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT8		0x1cb0
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT9		0x1cb4
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT10	0x1cb8
+#define EARCRX_DMAC_USRDATA_MSG_HDMI_ISRC2_PKT11	0x1cbc
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC0		0x1cc0
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC1		0x1cc4
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC2		0x1cc8
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC3		0x1ccc
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC4		0x1cd0
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC5		0x1cd4
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC6		0x1cd8
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC7		0x1cdc
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC8		0x1ce0
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC9		0x1ce4
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC10		0x1ce8
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC11		0x1cec
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC12		0x1cf0
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC13		0x1cf4
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC14		0x1cf8
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC15		0x1cfc
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC16		0x1d00
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC17		0x1d04
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC18		0x1d08
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC19		0x1d0c
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC20		0x1d10
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC21		0x1d14
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC22		0x1d18
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC23		0x1d1c
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC24		0x1d20
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC25		0x1d24
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC26		0x1d28
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC27		0x1d2c
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC28		0x1d30
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC29		0x1d34
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC30		0x1d38
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC31		0x1d3c
+#define EARCRX_DMAC_USRDATA_MSG_GENERIC32		0x1d40
+#define EARCRX_DMAC_CHSTATUS_STREAMER0			0x1d44
+#define EARCRX_DMAC_CHSTATUS_STREAMER1			0x1d48
+#define EARCRX_DMAC_CHSTATUS_STREAMER2			0x1d4c
+#define EARCRX_DMAC_CHSTATUS_STREAMER3			0x1d50
+#define EARCRX_DMAC_CHSTATUS_STREAMER4			0x1d54
+#define EARCRX_DMAC_CHSTATUS_STREAMER5			0x1d58
+#define EARCRX_DMAC_CHSTATUS_STREAMER6			0x1d5c
+#define EARCRX_DMAC_CHSTATUS_STREAMER7			0x1d60
+#define EARCRX_DMAC_CHSTATUS_STREAMER8			0x1d64
+#define EARCRX_DMAC_CHSTATUS_STREAMER9			0x1d68
+#define EARCRX_DMAC_CHSTATUS_STREAMER10			0x1d6c
+#define EARCRX_DMAC_CHSTATUS_STREAMER11			0x1d70
+#define EARCRX_DMAC_CHSTATUS_STREAMER12			0x1d74
+#define EARCRX_DMAC_CHSTATUS_STREAMER13			0x1d78
+#define EARCRX_DMAC_CHSTATUS_STREAMER14			0x1d7c
+#define EARCRX_DMAC_USRDATA_STREAMER0			0x1d80
+/* Main Unit Interrupt Registers */
+#define MAIN_INTVEC_INDEX				0x3000
+#define MAINUNIT_0_INT_STATUS				0x3010
+#define MAINUNIT_0_INT_MASK_N				0x3014
+#define MAINUNIT_0_INT_CLEAR				0x3018
+#define MAINUNIT_0_INT_FORCE				0x301c
+#define MAINUNIT_1_INT_STATUS				0x3020
+#define FLT_EXIT_TO_LTSL_IRQ				BIT(22)
+#define FLT_EXIT_TO_LTS4_IRQ				BIT(21)
+#define FLT_EXIT_TO_LTSP_IRQ				BIT(20)
+#define SCDC_NACK_RCVD_IRQ				BIT(12)
+#define SCDC_RR_REPLY_STOP_IRQ				BIT(11)
+#define SCDC_UPD_FLAGS_CLR_IRQ				BIT(10)
+#define SCDC_UPD_FLAGS_CHG_IRQ				BIT(9)
+#define SCDC_UPD_FLAGS_RD_IRQ				BIT(8)
+#define I2CM_NACK_RCVD_IRQ				BIT(2)
+#define I2CM_READ_REQUEST_IRQ				BIT(1)
+#define I2CM_OP_DONE_IRQ				BIT(0)
+#define MAINUNIT_1_INT_MASK_N				0x3024
+#define I2CM_NACK_RCVD_MASK_N				BIT(2)
+#define I2CM_READ_REQUEST_MASK_N			BIT(1)
+#define I2CM_OP_DONE_MASK_N				BIT(0)
+#define MAINUNIT_1_INT_CLEAR				0x3028
+#define I2CM_NACK_RCVD_CLEAR				BIT(2)
+#define I2CM_READ_REQUEST_CLEAR				BIT(1)
+#define I2CM_OP_DONE_CLEAR				BIT(0)
+#define MAINUNIT_1_INT_FORCE				0x302c
+/* AVPUNIT Interrupt Registers */
+#define AVP_INTVEC_INDEX				0x3800
+#define AVP_0_INT_STATUS				0x3810
+#define AVP_0_INT_MASK_N				0x3814
+#define AVP_0_INT_CLEAR					0x3818
+#define AVP_0_INT_FORCE					0x381c
+#define AVP_1_INT_STATUS				0x3820
+#define AVP_1_INT_MASK_N				0x3824
+#define HDCP14_AUTH_CHG_MASK_N				BIT(6)
+#define AVP_1_INT_CLEAR					0x3828
+#define AVP_1_INT_FORCE					0x382c
+#define AVP_2_INT_STATUS				0x3830
+#define AVP_2_INT_MASK_N				0x3834
+#define AVP_2_INT_CLEAR					0x3838
+#define AVP_2_INT_FORCE					0x383c
+#define AVP_3_INT_STATUS				0x3840
+#define AVP_3_INT_MASK_N				0x3844
+#define AVP_3_INT_CLEAR					0x3848
+#define AVP_3_INT_FORCE					0x384c
+#define AVP_4_INT_STATUS				0x3850
+#define AVP_4_INT_MASK_N				0x3854
+#define AVP_4_INT_CLEAR					0x3858
+#define AVP_4_INT_FORCE					0x385c
+#define AVP_5_INT_STATUS				0x3860
+#define AVP_5_INT_MASK_N				0x3864
+#define AVP_5_INT_CLEAR					0x3868
+#define AVP_5_INT_FORCE					0x386c
+#define AVP_6_INT_STATUS				0x3870
+#define AVP_6_INT_MASK_N				0x3874
+#define AVP_6_INT_CLEAR					0x3878
+#define AVP_6_INT_FORCE					0x387c
+/* CEC Interrupt Registers */
+#define CEC_INT_STATUS					0x4000
+#define CEC_INT_MASK_N					0x4004
+#define CEC_INT_CLEAR					0x4008
+#define CEC_INT_FORCE					0x400c
+/* eARC RX Interrupt Registers  */
+#define EARCRX_INTVEC_INDEX				0x4800
+#define EARCRX_0_INT_STATUS				0x4810
+#define EARCRX_CMDC_DISCOVERY_TIMEOUT_IRQ		BIT(9)
+#define EARCRX_CMDC_DISCOVERY_DONE_IRQ			BIT(8)
+#define EARCRX_0_INT_MASK_N				0x4814
+#define EARCRX_0_INT_CLEAR				0x4818
+#define EARCRX_0_INT_FORCE				0x481c
+#define EARCRX_1_INT_STATUS				0x4820
+#define EARCRX_1_INT_MASK_N				0x4824
+#define EARCRX_1_INT_CLEAR				0x4828
+#define EARCRX_1_INT_FORCE				0x482c
+
+#endif /* __DW_HDMI_QP_H__ */
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index cceb5aab6c83..c16262d10cc9 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -163,6 +163,8 @@ struct dw_hdmi {
 	void __iomem *regs;
 	bool sink_is_hdmi;
 	bool sink_has_audio;
+	bool support_hdmi;
+	int force_output;

 	struct pinctrl *pinctrl;
 	struct pinctrl_state *default_state;
@@ -255,6 +257,25 @@ static void hdmi_mask_writeb(struct dw_hdmi *hdmi, u8 data, unsigned int reg,
 	hdmi_modb(hdmi, data << shift, mask, reg);
 }

+static bool dw_hdmi_check_output_type_changed(struct dw_hdmi *hdmi)
+{
+	bool sink_hdmi;
+
+	sink_hdmi = hdmi->sink_is_hdmi;
+
+	if (hdmi->force_output == 1)
+		hdmi->sink_is_hdmi = true;
+	else if (hdmi->force_output == 2)
+		hdmi->sink_is_hdmi = false;
+	else
+		hdmi->sink_is_hdmi = hdmi->support_hdmi;
+
+	if (sink_hdmi != hdmi->sink_is_hdmi)
+		return true;
+
+	return false;
+}
+
 static void dw_hdmi_i2c_init(struct dw_hdmi *hdmi)
 {
 	hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL,
@@ -2539,6 +2560,45 @@ static int dw_hdmi_connector_atomic_check(struct drm_connector *connector,
 	return 0;
 }

+void dw_hdmi_set_quant_range(struct dw_hdmi *hdmi)
+{
+	if (!hdmi->bridge_is_on)
+		return;
+
+	hdmi_writeb(hdmi, HDMI_FC_GCP_SET_AVMUTE, HDMI_FC_GCP);
+	dw_hdmi_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode);
+	hdmi_writeb(hdmi, HDMI_FC_GCP_CLEAR_AVMUTE, HDMI_FC_GCP);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_quant_range);
+
+void dw_hdmi_set_output_type(struct dw_hdmi *hdmi, u64 val)
+{
+	hdmi->force_output = val;
+
+	if (!dw_hdmi_check_output_type_changed(hdmi))
+		return;
+
+	if (!hdmi->bridge_is_on)
+		return;
+
+	hdmi_writeb(hdmi, HDMI_FC_GCP_SET_AVMUTE, HDMI_FC_GCP);
+	dw_hdmi_setup(hdmi, hdmi->curr_conn, &hdmi->previous_mode);
+	hdmi_writeb(hdmi, HDMI_FC_GCP_CLEAR_AVMUTE, HDMI_FC_GCP);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_output_type);
+
+bool dw_hdmi_get_output_whether_hdmi(struct dw_hdmi *hdmi)
+{
+	return hdmi->sink_is_hdmi;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_get_output_whether_hdmi);
+
+int dw_hdmi_get_output_type_cap(struct dw_hdmi *hdmi)
+{
+	return hdmi->support_hdmi;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_get_output_type_cap);
+
 static void dw_hdmi_connector_force(struct drm_connector *connector)
 {
 	struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi,
@@ -3691,6 +3751,35 @@ void dw_hdmi_unbind(struct dw_hdmi *hdmi)
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_unbind);

+void dw_hdmi_suspend(struct dw_hdmi *hdmi)
+{
+	if (!hdmi)
+		return;
+
+	mutex_lock(&hdmi->mutex);
+
+	/*
+	 * When system shutdown, hdmi should be disabled.
+	 * When system suspend, dw_hdmi_bridge_disable will disable hdmi first.
+	 * To prevent duplicate operation, we should determine whether hdmi
+	 * has been disabled.
+	 */
+	if (!hdmi->disabled) {
+		hdmi->disabled = true;
+		dw_hdmi_update_power(hdmi);
+		dw_hdmi_update_phy_mask(hdmi);
+	}
+	mutex_unlock(&hdmi->mutex);
+
+	//[CC: needed?]
+	// if (hdmi->irq)
+	// 	disable_irq(hdmi->irq);
+	// cancel_delayed_work(&hdmi->work);
+	// flush_workqueue(hdmi->workqueue);
+	pinctrl_pm_select_sleep_state(hdmi->dev);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_suspend);
+
 void dw_hdmi_resume(struct dw_hdmi *hdmi)
 {
 	dw_hdmi_init_hw(hdmi);
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
index af43a0414b78..8ebdec7254f2 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.h
@@ -851,6 +851,10 @@ enum {
 	HDMI_FC_AVICONF3_QUANT_RANGE_LIMITED = 0x00,
 	HDMI_FC_AVICONF3_QUANT_RANGE_FULL = 0x04,

+/* HDMI_FC_GCP */
+	HDMI_FC_GCP_SET_AVMUTE = 0x2,
+	HDMI_FC_GCP_CLEAR_AVMUTE = 0x1,
+
 /* FC_DBGFORCE field values */
 	HDMI_FC_DBGFORCE_FORCEAUDIO = 0x10,
 	HDMI_FC_DBGFORCE_FORCEVIDEO = 0x1,
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index fe33092abbe7..a5d1a70d9b98 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -4,21 +4,32 @@
  */

 #include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/media-bus-format.h>
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
+#include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>

+#include <drm/drm_of.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/display/drm_dsc.h>
+#include <drm/drm_edid.h>
 #include <drm/bridge/dw_hdmi.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>

+#include <uapi/linux/videodev2.h>
+
 #include "rockchip_drm_drv.h"

+#define HIWORD_UPDATE(val, mask)	(val | (mask) << 16)
+
 #define RK3228_GRF_SOC_CON2		0x0408
 #define RK3228_HDMI_SDAIN_MSK		BIT(14)
 #define RK3228_HDMI_SCLIN_MSK		BIT(13)
@@ -29,8 +40,11 @@

 #define RK3288_GRF_SOC_CON6		0x025C
 #define RK3288_HDMI_LCDC_SEL		BIT(4)
-#define RK3328_GRF_SOC_CON2		0x0408
+#define RK3288_GRF_SOC_CON16		0x03a8
+#define RK3288_HDMI_LCDC0_YUV420	BIT(2)
+#define RK3288_HDMI_LCDC1_YUV420	BIT(3)

+#define RK3328_GRF_SOC_CON2		0x0408
 #define RK3328_HDMI_SDAIN_MSK		BIT(11)
 #define RK3328_HDMI_SCLIN_MSK		BIT(10)
 #define RK3328_HDMI_HPD_IOE		BIT(2)
@@ -54,32 +68,177 @@
 #define RK3568_HDMI_SDAIN_MSK		BIT(15)
 #define RK3568_HDMI_SCLIN_MSK		BIT(14)

-#define HIWORD_UPDATE(val, mask)	(val | (mask) << 16)
+#define RK3588_GRF_SOC_CON2		0x0308
+#define RK3588_HDMI1_HPD_INT_MSK	BIT(15)
+#define RK3588_HDMI1_HPD_INT_CLR	BIT(14)
+#define RK3588_HDMI0_HPD_INT_MSK	BIT(13)
+#define RK3588_HDMI0_HPD_INT_CLR	BIT(12)
+#define RK3588_GRF_SOC_CON7		0x031c
+#define RK3588_SET_HPD_PATH_MASK	(0x3 << 12)
+#define RK3588_GRF_SOC_STATUS1		0x0384
+#define RK3588_HDMI0_LOW_MORETHAN100MS	BIT(20)
+#define RK3588_HDMI0_HPD_PORT_LEVEL	BIT(19)
+#define RK3588_HDMI0_IHPD_PORT		BIT(18)
+#define RK3588_HDMI0_OHPD_INT		BIT(17)
+#define RK3588_HDMI0_LEVEL_INT		BIT(16)
+#define RK3588_HDMI0_INTR_CHANGE_CNT	(0x7 << 13)
+#define RK3588_HDMI1_LOW_MORETHAN100MS	BIT(28)
+#define RK3588_HDMI1_HPD_PORT_LEVEL	BIT(27)
+#define RK3588_HDMI1_IHPD_PORT		BIT(26)
+#define RK3588_HDMI1_OHPD_INT		BIT(25)
+#define RK3588_HDMI1_LEVEL_INT		BIT(24)
+#define RK3588_HDMI1_INTR_CHANGE_CNT	(0x7 << 21)
+
+#define RK3588_GRF_VO1_CON3		0x000c
+#define RK3588_COLOR_FORMAT_MASK	0xf
+#define RK3588_YUV444			0x2
+#define RK3588_YUV420			0x3
+#define RK3588_COMPRESSED_DATA		0xb
+#define RK3588_COLOR_DEPTH_MASK		(0xf << 4)
+#define RK3588_8BPC			(0x5 << 4)
+#define RK3588_10BPC			(0x6 << 4)
+#define RK3588_CECIN_MASK		BIT(8)
+#define RK3588_SCLIN_MASK		BIT(9)
+#define RK3588_SDAIN_MASK		BIT(10)
+#define RK3588_MODE_MASK		BIT(11)
+#define RK3588_COMPRESS_MODE_MASK	BIT(12)
+#define RK3588_I2S_SEL_MASK		BIT(13)
+#define RK3588_SPDIF_SEL_MASK		BIT(14)
+#define RK3588_GRF_VO1_CON4		0x0010
+#define RK3588_HDMI21_MASK		BIT(0)
+#define RK3588_GRF_VO1_CON9		0x0024
+#define RK3588_HDMI0_GRANT_SEL		BIT(10)
+#define RK3588_HDMI0_GRANT_SW		BIT(11)
+#define RK3588_HDMI1_GRANT_SEL		BIT(12)
+#define RK3588_HDMI1_GRANT_SW		BIT(13)
+#define RK3588_GRF_VO1_CON6		0x0018
+#define RK3588_GRF_VO1_CON7		0x001c
+
+#define COLOR_DEPTH_10BIT		BIT(31)
+#define HDMI_FRL_MODE			BIT(30)
+#define HDMI_EARC_MODE			BIT(29)
+
+#define HDMI20_MAX_RATE			600000
+#define HDMI_8K60_RATE			2376000

 /**
  * struct rockchip_hdmi_chip_data - splite the grf setting of kind of chips
  * @lcdsel_grf_reg: grf register offset of lcdc select
+ * @ddc_en_reg: grf register offset of hdmi ddc enable
  * @lcdsel_big: reg value of selecting vop big for HDMI
  * @lcdsel_lit: reg value of selecting vop little for HDMI
+ * @split_mode: flag indicating split mode capability
  */
 struct rockchip_hdmi_chip_data {
 	int	lcdsel_grf_reg;
+	int	ddc_en_reg;
 	u32	lcdsel_big;
 	u32	lcdsel_lit;
+	bool	split_mode;
+};
+
+enum hdmi_frl_rate_per_lane {
+	FRL_12G_PER_LANE = 12,
+	FRL_10G_PER_LANE = 10,
+	FRL_8G_PER_LANE = 8,
+	FRL_6G_PER_LANE = 6,
+	FRL_3G_PER_LANE = 3,
+};
+
+enum rk_if_color_depth {
+	RK_IF_DEPTH_8,
+	RK_IF_DEPTH_10,
+	RK_IF_DEPTH_12,
+	RK_IF_DEPTH_16,
+	RK_IF_DEPTH_420_10,
+	RK_IF_DEPTH_420_12,
+	RK_IF_DEPTH_420_16,
+	RK_IF_DEPTH_6,
+	RK_IF_DEPTH_MAX,
+};
+
+enum rk_if_color_format {
+	RK_IF_FORMAT_RGB, /* default RGB */
+	RK_IF_FORMAT_YCBCR444, /* YCBCR 444 */
+	RK_IF_FORMAT_YCBCR422, /* YCBCR 422 */
+	RK_IF_FORMAT_YCBCR420, /* YCBCR 420 */
+	RK_IF_FORMAT_YCBCR_HQ, /* Highest subsampled YUV */
+	RK_IF_FORMAT_YCBCR_LQ, /* Lowest subsampled YUV */
+	RK_IF_FORMAT_MAX,
 };

 struct rockchip_hdmi {
 	struct device *dev;
 	struct regmap *regmap;
+	struct regmap *vo1_regmap;
 	struct rockchip_encoder encoder;
+	struct drm_device *drm_dev;
 	const struct rockchip_hdmi_chip_data *chip_data;
-	const struct dw_hdmi_plat_data *plat_data;
+	struct dw_hdmi_plat_data *plat_data;
+	struct clk *aud_clk;
 	struct clk *ref_clk;
 	struct clk *grf_clk;
+	struct clk *hclk_vio;
+	struct clk *hclk_vo1;
+	struct clk *hclk_vop;
+	struct clk *hpd_clk;
+	struct clk *pclk;
+	struct clk *earc_clk;
+	struct clk *hdmitx_ref;
 	struct dw_hdmi *hdmi;
+	struct dw_hdmi_qp *hdmi_qp;
+
 	struct regulator *avdd_0v9;
 	struct regulator *avdd_1v8;
 	struct phy *phy;
+
+	u32 max_tmdsclk;
+	bool unsupported_yuv_input;
+	bool unsupported_deep_color;
+	bool skip_check_420_mode;
+	u8 force_output;
+	u8 id;
+	bool hpd_stat;
+	bool is_hdmi_qp;
+	bool user_split_mode;
+
+	unsigned long bus_format;
+	unsigned long output_bus_format;
+	unsigned long enc_out_encoding;
+	int color_changed;
+	int hpd_irq;
+	int vp_id;
+
+	struct drm_property *color_depth_property;
+	struct drm_property *hdmi_output_property;
+	struct drm_property *colordepth_capacity;
+	struct drm_property *outputmode_capacity;
+	struct drm_property *quant_range;
+	struct drm_property *hdr_panel_metadata_property;
+	struct drm_property *next_hdr_sink_data_property;
+	struct drm_property *output_hdmi_dvi;
+	struct drm_property *output_type_capacity;
+	struct drm_property *user_split_mode_prop;
+
+	struct drm_property_blob *hdr_panel_blob_ptr;
+	struct drm_property_blob *next_hdr_data_ptr;
+
+	unsigned int colordepth;
+	unsigned int colorimetry;
+	unsigned int hdmi_quant_range;
+	unsigned int phy_bus_width;
+	enum rk_if_color_format hdmi_output;
+	// struct rockchip_drm_sub_dev sub_dev;
+
+	u8 max_frl_rate_per_lane;
+	u8 max_lanes;
+	// struct rockchip_drm_dsc_cap dsc_cap;
+	// struct next_hdr_sink_data next_hdr_data;
+	struct dw_hdmi_link_config link_cfg;
+	struct gpio_desc *enable_gpio;
+
+	struct delayed_work work;
+	struct workqueue_struct *workqueue;
 };

 static struct rockchip_hdmi *to_rockchip_hdmi(struct drm_encoder *encoder)
@@ -202,13 +361,830 @@ static const struct dw_hdmi_phy_config rockchip_phy_config[] = {
 	/*pixelclk   symbol   term   vlev*/
 	{ 74250000,  0x8009, 0x0004, 0x0272},
 	{ 148500000, 0x802b, 0x0004, 0x028d},
+	{ 165000000, 0x802b, 0x0004, 0x0209},
 	{ 297000000, 0x8039, 0x0005, 0x028d},
+	{ 594000000, 0x8039, 0x0000, 0x019d},
 	{ ~0UL,	     0x0000, 0x0000, 0x0000}
 };

+enum ROW_INDEX_BPP {
+	ROW_INDEX_6BPP = 0,
+	ROW_INDEX_8BPP,
+	ROW_INDEX_10BPP,
+	ROW_INDEX_12BPP,
+	ROW_INDEX_23BPP,
+	MAX_ROW_INDEX
+};
+
+enum COLUMN_INDEX_BPC {
+	COLUMN_INDEX_8BPC = 0,
+	COLUMN_INDEX_10BPC,
+	COLUMN_INDEX_12BPC,
+	COLUMN_INDEX_14BPC,
+	COLUMN_INDEX_16BPC,
+	MAX_COLUMN_INDEX
+};
+
+#define PPS_TABLE_LEN 8
+#define PPS_BPP_LEN 4
+#define PPS_BPC_LEN 2
+
+struct pps_data {
+	u32 pic_width;
+	u32 pic_height;
+	u32 slice_width;
+	u32 slice_height;
+	bool convert_rgb;
+	u8 bpc;
+	u8 bpp;
+	u8 raw_pps[128];
+};
+
+#if 0
+/*
+ * Selected Rate Control Related Parameter Recommended Values
+ * from DSC_v1.11 spec & C Model release: DSC_model_20161212
+ */
+static struct pps_data pps_datas[PPS_TABLE_LEN] = {
+	{
+		/* 7680x4320/960X96 rgb 8bpc 12bpp */
+		7680, 4320, 960, 96, 1, 8, 192,
+		{
+			0x12, 0x00, 0x00, 0x8d, 0x30, 0xc0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x05, 0xa0,
+			0x01, 0x55, 0x03, 0x90, 0x00, 0x0a, 0x05, 0xc9,
+			0x00, 0xa0, 0x00, 0x0f, 0x01, 0x44, 0x01, 0xaa,
+			0x08, 0x00, 0x10, 0xf4, 0x03, 0x0c, 0x20, 0x00,
+			0x06, 0x0b, 0x0b, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x00, 0x82, 0x00, 0xc0, 0x09, 0x00,
+			0x09, 0x7e, 0x19, 0xbc, 0x19, 0xba, 0x19, 0xf8,
+			0x1a, 0x38, 0x1a, 0x38, 0x1a, 0x76, 0x2a, 0x76,
+			0x2a, 0x76, 0x2a, 0x74, 0x3a, 0xb4, 0x52, 0xf4,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 8bpc 11bpp */
+		7680, 4320, 960, 96, 1, 8, 176,
+		{
+			0x12, 0x00, 0x00, 0x8d, 0x30, 0xb0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x05, 0x28,
+			0x01, 0x74, 0x03, 0x40, 0x00, 0x0f, 0x06, 0xe0,
+			0x00, 0x2d, 0x00, 0x0f, 0x01, 0x44, 0x01, 0x33,
+			0x0f, 0x00, 0x10, 0xf4, 0x03, 0x0c, 0x20, 0x00,
+			0x06, 0x0b, 0x0b, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x00, 0x82, 0x01, 0x00, 0x09, 0x40,
+			0x09, 0xbe, 0x19, 0xfc, 0x19, 0xfa, 0x19, 0xf8,
+			0x1a, 0x38, 0x1a, 0x38, 0x1a, 0x76, 0x2a, 0x76,
+			0x2a, 0x76, 0x2a, 0xb4, 0x3a, 0xb4, 0x52, 0xf4,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 8bpc 10bpp */
+		7680, 4320, 960, 96, 1, 8, 160,
+		{
+			0x12, 0x00, 0x00, 0x8d, 0x30, 0xa0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x04, 0xb0,
+			0x01, 0x9a, 0x02, 0xe0, 0x00, 0x19, 0x09, 0xb0,
+			0x00, 0x12, 0x00, 0x0f, 0x01, 0x44, 0x00, 0xbb,
+			0x16, 0x00, 0x10, 0xec, 0x03, 0x0c, 0x20, 0x00,
+			0x06, 0x0b, 0x0b, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x00, 0xc2, 0x01, 0x00, 0x09, 0x40,
+			0x09, 0xbe, 0x19, 0xfc, 0x19, 0xfa, 0x19, 0xf8,
+			0x1a, 0x38, 0x1a, 0x78, 0x1a, 0x76, 0x2a, 0xb6,
+			0x2a, 0xb6, 0x2a, 0xf4, 0x3a, 0xf4, 0x5b, 0x34,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 8bpc 9bpp */
+		7680, 4320, 960, 96, 1, 8, 144,
+		{
+			0x12, 0x00, 0x00, 0x8d, 0x30, 0x90, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x04, 0x38,
+			0x01, 0xc7, 0x03, 0x16, 0x00, 0x1c, 0x08, 0xc7,
+			0x00, 0x10, 0x00, 0x0f, 0x01, 0x44, 0x00, 0xaa,
+			0x17, 0x00, 0x10, 0xf1, 0x03, 0x0c, 0x20, 0x00,
+			0x06, 0x0b, 0x0b, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x00, 0xc2, 0x01, 0x00, 0x09, 0x40,
+			0x09, 0xbe, 0x19, 0xfc, 0x19, 0xfa, 0x19, 0xf8,
+			0x1a, 0x38, 0x1a, 0x78, 0x1a, 0x76, 0x2a, 0xb6,
+			0x2a, 0xb6, 0x2a, 0xf4, 0x3a, 0xf4, 0x63, 0x74,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 10bpc 12bpp */
+		7680, 4320, 960, 96, 1, 10, 192,
+		{
+			0x12, 0x00, 0x00, 0xad, 0x30, 0xc0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x05, 0xa0,
+			0x01, 0x55, 0x03, 0x90, 0x00, 0x0a, 0x05, 0xc9,
+			0x00, 0xa0, 0x00, 0x0f, 0x01, 0x44, 0x01, 0xaa,
+			0x08, 0x00, 0x10, 0xf4, 0x07, 0x10, 0x20, 0x00,
+			0x06, 0x0f, 0x0f, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x01, 0x02, 0x11, 0x80, 0x22, 0x00,
+			0x22, 0x7e, 0x32, 0xbc, 0x32, 0xba, 0x3a, 0xf8,
+			0x3b, 0x38, 0x3b, 0x38, 0x3b, 0x76, 0x4b, 0x76,
+			0x4b, 0x76, 0x4b, 0x74, 0x5b, 0xb4, 0x73, 0xf4,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 10bpc 11bpp */
+		7680, 4320, 960, 96, 1, 10, 176,
+		{
+			0x12, 0x00, 0x00, 0xad, 0x30, 0xb0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x05, 0x28,
+			0x01, 0x74, 0x03, 0x40, 0x00, 0x0f, 0x06, 0xe0,
+			0x00, 0x2d, 0x00, 0x0f, 0x01, 0x44, 0x01, 0x33,
+			0x0f, 0x00, 0x10, 0xf4, 0x07, 0x10, 0x20, 0x00,
+			0x06, 0x0f, 0x0f, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x01, 0x42, 0x19, 0xc0, 0x2a, 0x40,
+			0x2a, 0xbe, 0x3a, 0xfc, 0x3a, 0xfa, 0x3a, 0xf8,
+			0x3b, 0x38, 0x3b, 0x38, 0x3b, 0x76, 0x4b, 0x76,
+			0x4b, 0x76, 0x4b, 0xb4, 0x5b, 0xb4, 0x73, 0xf4,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 10bpc 10bpp */
+		7680, 4320, 960, 96, 1, 10, 160,
+		{
+			0x12, 0x00, 0x00, 0xad, 0x30, 0xa0, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x04, 0xb0,
+			0x01, 0x9a, 0x02, 0xe0, 0x00, 0x19, 0x09, 0xb0,
+			0x00, 0x12, 0x00, 0x0f, 0x01, 0x44, 0x00, 0xbb,
+			0x16, 0x00, 0x10, 0xec, 0x07, 0x10, 0x20, 0x00,
+			0x06, 0x0f, 0x0f, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x01, 0xc2, 0x22, 0x00, 0x2a, 0x40,
+			0x2a, 0xbe, 0x3a, 0xfc, 0x3a, 0xfa, 0x3a, 0xf8,
+			0x3b, 0x38, 0x3b, 0x78, 0x3b, 0x76, 0x4b, 0xb6,
+			0x4b, 0xb6, 0x4b, 0xf4, 0x63, 0xf4, 0x7c, 0x34,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+	{
+		/* 7680x4320/960X96 rgb 10bpc 9bpp */
+		7680, 4320, 960, 96, 1, 10, 144,
+		{
+			0x12, 0x00, 0x00, 0xad, 0x30, 0x90, 0x10, 0xe0,
+			0x1e, 0x00, 0x00, 0x60, 0x03, 0xc0, 0x04, 0x38,
+			0x01, 0xc7, 0x03, 0x16, 0x00, 0x1c, 0x08, 0xc7,
+			0x00, 0x10, 0x00, 0x0f, 0x01, 0x44, 0x00, 0xaa,
+			0x17, 0x00, 0x10, 0xf1, 0x07, 0x10, 0x20, 0x00,
+			0x06, 0x0f, 0x0f, 0x33, 0x0e, 0x1c, 0x2a, 0x38,
+			0x46, 0x54, 0x62, 0x69, 0x70, 0x77, 0x79, 0x7b,
+			0x7d, 0x7e, 0x01, 0xc2, 0x22, 0x00, 0x2a, 0x40,
+			0x2a, 0xbe, 0x3a, 0xfc, 0x3a, 0xfa, 0x3a, 0xf8,
+			0x3b, 0x38, 0x3b, 0x78, 0x3b, 0x76, 0x4b, 0xb6,
+			0x4b, 0xb6, 0x4b, 0xf4, 0x63, 0xf4, 0x84, 0x74,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+		},
+	},
+};
+
+static bool hdmi_bus_fmt_is_rgb(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv444(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+#endif
+
+static bool hdmi_bus_fmt_is_yuv422(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv420(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		return true;
+
+	default:
+	return false;
+	}
+}
+
+static int hdmi_bus_fmt_color_depth(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+		return 8;
+
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		return 10;
+
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+		return 12;
+
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		return 16;
+
+	default:
+		return 0;
+	}
+}
+
+static unsigned int
+hdmi_get_tmdsclock(struct rockchip_hdmi *hdmi, unsigned long pixelclock)
+{
+	unsigned int tmdsclock = pixelclock;
+	unsigned int depth =
+		hdmi_bus_fmt_color_depth(hdmi->output_bus_format);
+
+	if (!hdmi_bus_fmt_is_yuv422(hdmi->output_bus_format)) {
+		switch (depth) {
+		case 16:
+			tmdsclock = pixelclock * 2;
+			break;
+		case 12:
+			tmdsclock = pixelclock * 3 / 2;
+			break;
+		case 10:
+			tmdsclock = pixelclock * 5 / 4;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return tmdsclock;
+}
+
+static int rockchip_hdmi_match_by_id(struct device *dev, const void *data)
+{
+	struct rockchip_hdmi *hdmi = dev_get_drvdata(dev);
+	const unsigned int *id = data;
+
+	return hdmi->id == *id;
+}
+
+static struct rockchip_hdmi *
+rockchip_hdmi_find_by_id(struct device_driver *drv, unsigned int id)
+{
+	struct device *dev;
+
+	dev = driver_find_device(drv, NULL, &id, rockchip_hdmi_match_by_id);
+	if (!dev)
+		return NULL;
+
+	return dev_get_drvdata(dev);
+}
+
+static void hdmi_select_link_config(struct rockchip_hdmi *hdmi,
+				    struct drm_crtc_state *crtc_state,
+				    unsigned int tmdsclk)
+{
+	struct drm_display_mode mode;
+	int max_lanes, max_rate_per_lane;
+	// int max_dsc_lanes, max_dsc_rate_per_lane;
+	unsigned long max_frl_rate;
+
+	drm_mode_copy(&mode, &crtc_state->mode);
+
+	max_lanes = hdmi->max_lanes;
+	max_rate_per_lane = hdmi->max_frl_rate_per_lane;
+	max_frl_rate = max_lanes * max_rate_per_lane * 1000000;
+
+	hdmi->link_cfg.dsc_mode = false;
+	hdmi->link_cfg.frl_lanes = max_lanes;
+	hdmi->link_cfg.rate_per_lane = max_rate_per_lane;
+
+	if (!max_frl_rate || (tmdsclk < HDMI20_MAX_RATE && mode.clock < HDMI20_MAX_RATE)) {
+		hdmi->link_cfg.frl_mode = false;
+		return;
+	}
+
+	hdmi->link_cfg.frl_mode = true;
+	dev_warn(hdmi->dev, "use unsupported frl hdmi mode\n");
+
+	// if (!hdmi->dsc_cap.v_1p2)
+	// 	return;
+	//
+	// max_dsc_lanes = hdmi->dsc_cap.max_lanes;
+	// max_dsc_rate_per_lane =
+	// 	hdmi->dsc_cap.max_frl_rate_per_lane;
+	//
+	// if (mode.clock >= HDMI_8K60_RATE &&
+	//     !hdmi_bus_fmt_is_yuv420(hdmi->bus_format) &&
+	//     !hdmi_bus_fmt_is_yuv422(hdmi->bus_format)) {
+	// 	hdmi->link_cfg.dsc_mode = true;
+	// 	hdmi->link_cfg.frl_lanes = max_dsc_lanes;
+	// 	hdmi->link_cfg.rate_per_lane = max_dsc_rate_per_lane;
+	// } else {
+	// 	hdmi->link_cfg.dsc_mode = false;
+	// 	hdmi->link_cfg.frl_lanes = max_lanes;
+	// 	hdmi->link_cfg.rate_per_lane = max_rate_per_lane;
+	// }
+}
+
+/////////////////////////////////////////////////////////////////////////////////////
+/* CC: disable DSC */
+#if 0
+static int hdmi_dsc_get_slice_height(int vactive)
+{
+	int slice_height;
+
+	/*
+	 * Slice Height determination : HDMI2.1 Section 7.7.5.2
+	 * Select smallest slice height >=96, that results in a valid PPS and
+	 * requires minimum padding lines required for final slice.
+	 *
+	 * Assumption : Vactive is even.
+	 */
+	for (slice_height = 96; slice_height <= vactive; slice_height += 2)
+		if (vactive % slice_height == 0)
+			return slice_height;
+
+	return 0;
+}
+
+static int hdmi_dsc_get_num_slices(struct rockchip_hdmi *hdmi,
+				   struct drm_crtc_state *crtc_state,
+				   int src_max_slices, int src_max_slice_width,
+				   int hdmi_max_slices, int hdmi_throughput)
+{
+/* Pixel rates in KPixels/sec */
+#define HDMI_DSC_PEAK_PIXEL_RATE		2720000
+/*
+ * Rates at which the source and sink are required to process pixels in each
+ * slice, can be two levels: either at least 340000KHz or at least 40000KHz.
+ */
+#define HDMI_DSC_MAX_ENC_THROUGHPUT_0		340000
+#define HDMI_DSC_MAX_ENC_THROUGHPUT_1		400000
+
+/* Spec limits the slice width to 2720 pixels */
+#define MAX_HDMI_SLICE_WIDTH			2720
+	int kslice_adjust;
+	int adjusted_clk_khz;
+	int min_slices;
+	int target_slices;
+	int max_throughput; /* max clock freq. in khz per slice */
+	int max_slice_width;
+	int slice_width;
+	int pixel_clock = crtc_state->mode.clock;
+
+	if (!hdmi_throughput)
+		return 0;
+
+	/*
+	 * Slice Width determination : HDMI2.1 Section 7.7.5.1
+	 * kslice_adjust factor for 4:2:0, and 4:2:2 formats is 0.5, where as
+	 * for 4:4:4 is 1.0. Multiplying these factors by 10 and later
+	 * dividing adjusted clock value by 10.
+	 */
+	if (hdmi_bus_fmt_is_yuv444(hdmi->output_bus_format) ||
+	    hdmi_bus_fmt_is_rgb(hdmi->output_bus_format))
+		kslice_adjust = 10;
+	else
+		kslice_adjust = 5;
+
+	/*
+	 * As per spec, the rate at which the source and the sink process
+	 * the pixels per slice are at two levels: at least 340Mhz or 400Mhz.
+	 * This depends upon the pixel clock rate and output formats
+	 * (kslice adjust).
+	 * If pixel clock * kslice adjust >= 2720MHz slices can be processed
+	 * at max 340MHz, otherwise they can be processed at max 400MHz.
+	 */
+
+	adjusted_clk_khz = DIV_ROUND_UP(kslice_adjust * pixel_clock, 10);
+
+	if (adjusted_clk_khz <= HDMI_DSC_PEAK_PIXEL_RATE)
+		max_throughput = HDMI_DSC_MAX_ENC_THROUGHPUT_0;
+	else
+		max_throughput = HDMI_DSC_MAX_ENC_THROUGHPUT_1;
+
+	/*
+	 * Taking into account the sink's capability for maximum
+	 * clock per slice (in MHz) as read from HF-VSDB.
+	 */
+	max_throughput = min(max_throughput, hdmi_throughput * 1000);
+
+	min_slices = DIV_ROUND_UP(adjusted_clk_khz, max_throughput);
+	max_slice_width = min(MAX_HDMI_SLICE_WIDTH, src_max_slice_width);
+
+	/*
+	 * Keep on increasing the num of slices/line, starting from min_slices
+	 * per line till we get such a number, for which the slice_width is
+	 * just less than max_slice_width. The slices/line selected should be
+	 * less than or equal to the max horizontal slices that the combination
+	 * of PCON encoder and HDMI decoder can support.
+	 */
+	do {
+		if (min_slices <= 1 && src_max_slices >= 1 && hdmi_max_slices >= 1)
+			target_slices = 1;
+		else if (min_slices <= 2 && src_max_slices >= 2 && hdmi_max_slices >= 2)
+			target_slices = 2;
+		else if (min_slices <= 4 && src_max_slices >= 4 && hdmi_max_slices >= 4)
+			target_slices = 4;
+		else if (min_slices <= 8 && src_max_slices >= 8 && hdmi_max_slices >= 8)
+			target_slices = 8;
+		else if (min_slices <= 12 && src_max_slices >= 12 && hdmi_max_slices >= 12)
+			target_slices = 12;
+		else if (min_slices <= 16 && src_max_slices >= 16 && hdmi_max_slices >= 16)
+			target_slices = 16;
+		else
+			return 0;
+
+		slice_width = DIV_ROUND_UP(crtc_state->mode.hdisplay, target_slices);
+		if (slice_width > max_slice_width)
+			min_slices = target_slices + 1;
+	} while (slice_width > max_slice_width);
+
+	return target_slices;
+}
+
+static int hdmi_dsc_slices(struct rockchip_hdmi *hdmi,
+			   struct drm_crtc_state *crtc_state)
+{
+	int hdmi_throughput = hdmi->dsc_cap.clk_per_slice;
+	int hdmi_max_slices = hdmi->dsc_cap.max_slices;
+	int rk_max_slices = 8;
+	int rk_max_slice_width = 2048;
+
+	return hdmi_dsc_get_num_slices(hdmi, crtc_state, rk_max_slices,
+				       rk_max_slice_width,
+				       hdmi_max_slices, hdmi_throughput);
+}
+
+static int
+hdmi_dsc_get_bpp(struct rockchip_hdmi *hdmi, int src_fractional_bpp,
+		 int slice_width, int num_slices, bool hdmi_all_bpp,
+		 int hdmi_max_chunk_bytes)
+{
+	int max_dsc_bpp, min_dsc_bpp;
+	int target_bytes;
+	bool bpp_found = false;
+	int bpp_decrement_x16;
+	int bpp_target;
+	int bpp_target_x16;
+
+	/*
+	 * Get min bpp and max bpp as per Table 7.23, in HDMI2.1 spec
+	 * Start with the max bpp and keep on decrementing with
+	 * fractional bpp, if supported by PCON DSC encoder
+	 *
+	 * for each bpp we check if no of bytes can be supported by HDMI sink
+	 */
+
+	/* only 9\10\12 bpp was tested */
+	min_dsc_bpp = 9;
+	max_dsc_bpp = 12;
+
+	/*
+	 * Taking into account if all dsc_all_bpp supported by HDMI2.1 sink
+	 * Section 7.7.34 : Source shall not enable compressed Video
+	 * Transport with bpp_target settings above 12 bpp unless
+	 * DSC_all_bpp is set to 1.
+	 */
+	if (!hdmi_all_bpp)
+		max_dsc_bpp = min(max_dsc_bpp, 12);
+
+	/*
+	 * The Sink has a limit of compressed data in bytes for a scanline,
+	 * as described in max_chunk_bytes field in HFVSDB block of edid.
+	 * The no. of bytes depend on the target bits per pixel that the
+	 * source configures. So we start with the max_bpp and calculate
+	 * the target_chunk_bytes. We keep on decrementing the target_bpp,
+	 * till we get the target_chunk_bytes just less than what the sink's
+	 * max_chunk_bytes, or else till we reach the min_dsc_bpp.
+	 *
+	 * The decrement is according to the fractional support from PCON DSC
+	 * encoder. For fractional BPP we use bpp_target as a multiple of 16.
+	 *
+	 * bpp_target_x16 = bpp_target * 16
+	 * So we need to decrement by {1, 2, 4, 8, 16} for fractional bpps
+	 * {1/16, 1/8, 1/4, 1/2, 1} respectively.
+	 */
+
+	bpp_target = max_dsc_bpp;
+
+	/* src does not support fractional bpp implies decrement by 16 for bppx16 */
+	if (!src_fractional_bpp)
+		src_fractional_bpp = 1;
+	bpp_decrement_x16 = DIV_ROUND_UP(16, src_fractional_bpp);
+	bpp_target_x16 = bpp_target * 16;
+
+	while (bpp_target_x16 > (min_dsc_bpp * 16)) {
+		int bpp;
+
+		bpp = DIV_ROUND_UP(bpp_target_x16, 16);
+		target_bytes = DIV_ROUND_UP((num_slices * slice_width * bpp), 8);
+		if (target_bytes <= hdmi_max_chunk_bytes) {
+			bpp_found = true;
+			break;
+		}
+		bpp_target_x16 -= bpp_decrement_x16;
+	}
+	if (bpp_found)
+		return bpp_target_x16;
+
+	return 0;
+}
+
+static int
+dw_hdmi_dsc_bpp(struct rockchip_hdmi *hdmi,
+		int num_slices, int slice_width)
+{
+	bool hdmi_all_bpp = hdmi->dsc_cap.all_bpp;
+	int fractional_bpp = 0;
+	int hdmi_max_chunk_bytes = hdmi->dsc_cap.total_chunk_kbytes * 1024;
+
+	return hdmi_dsc_get_bpp(hdmi, fractional_bpp, slice_width,
+				num_slices, hdmi_all_bpp,
+				hdmi_max_chunk_bytes);
+}
+
+static int dw_hdmi_qp_set_link_cfg(struct rockchip_hdmi *hdmi,
+				   u16 pic_width, u16 pic_height,
+				   u16 slice_width, u16 slice_height,
+				   u16 bits_per_pixel, u8 bits_per_component)
+{
+	int i;
+
+	for (i = 0; i < PPS_TABLE_LEN; i++)
+		if (pic_width == pps_datas[i].pic_width &&
+		    pic_height == pps_datas[i].pic_height &&
+		    slice_width == pps_datas[i].slice_width &&
+		    slice_height == pps_datas[i].slice_height &&
+		    bits_per_component == pps_datas[i].bpc &&
+		    bits_per_pixel == pps_datas[i].bpp &&
+		    hdmi_bus_fmt_is_rgb(hdmi->output_bus_format) == pps_datas[i].convert_rgb)
+			break;
+
+	if (i == PPS_TABLE_LEN) {
+		dev_err(hdmi->dev, "can't find pps cfg!\n");
+		return -EINVAL;
+	}
+
+	memcpy(hdmi->link_cfg.pps_payload, pps_datas[i].raw_pps, 128);
+	hdmi->link_cfg.hcactive = DIV_ROUND_UP(slice_width * (bits_per_pixel / 16), 8) *
+		(pic_width / slice_width);
+
+	return 0;
+}
+
+static void dw_hdmi_qp_dsc_configure(struct rockchip_hdmi *hdmi,
+				     struct rockchip_crtc_state *s,
+				     struct drm_crtc_state *crtc_state)
+{
+	int ret;
+	int slice_height;
+	int slice_width;
+	int bits_per_pixel;
+	int slice_count;
+	bool hdmi_is_dsc_1_2;
+	unsigned int depth = hdmi_bus_fmt_color_depth(hdmi->output_bus_format);
+
+	if (!crtc_state)
+		return;
+
+	hdmi_is_dsc_1_2 = hdmi->dsc_cap.v_1p2;
+
+	if (!hdmi_is_dsc_1_2)
+		return;
+
+	slice_height = hdmi_dsc_get_slice_height(crtc_state->mode.vdisplay);
+	if (!slice_height)
+		return;
+
+	slice_count = hdmi_dsc_slices(hdmi, crtc_state);
+	if (!slice_count)
+		return;
+
+	slice_width = DIV_ROUND_UP(crtc_state->mode.hdisplay, slice_count);
+
+	bits_per_pixel = dw_hdmi_dsc_bpp(hdmi, slice_count, slice_width);
+	if (!bits_per_pixel)
+		return;
+
+	ret = dw_hdmi_qp_set_link_cfg(hdmi, crtc_state->mode.hdisplay,
+				      crtc_state->mode.vdisplay, slice_width,
+				      slice_height, bits_per_pixel, depth);
+
+	if (ret) {
+		dev_err(hdmi->dev, "set vdsc cfg failed\n");
+		return;
+	}
+	dev_info(hdmi->dev, "dsc_enable\n");
+	s->dsc_enable = 1;
+	s->dsc_sink_cap.version_major = 1;
+	s->dsc_sink_cap.version_minor = 2;
+	s->dsc_sink_cap.slice_width = slice_width;
+	s->dsc_sink_cap.slice_height = slice_height;
+	s->dsc_sink_cap.target_bits_per_pixel_x16 = bits_per_pixel;
+	s->dsc_sink_cap.block_pred = 1;
+	s->dsc_sink_cap.native_420 = 0;
+
+	memcpy(&s->pps, hdmi->link_cfg.pps_payload, 128);
+}
+#endif
+/////////////////////////////////////////////////////////////////////////////////////////
+
+// static int rockchip_hdmi_update_phy_table(struct rockchip_hdmi *hdmi,
+// 					  u32 *config,
+// 					  int phy_table_size)
+// {
+// 	int i;
+//
+// 	if (phy_table_size > ARRAY_SIZE(rockchip_phy_config)) {
+// 		dev_err(hdmi->dev, "phy table array number is out of range\n");
+// 		return -E2BIG;
+// 	}
+//
+// 	for (i = 0; i < phy_table_size; i++) {
+// 		if (config[i * 4] != 0)
+// 			rockchip_phy_config[i].mpixelclock = (u64)config[i * 4];
+// 		else
+// 			rockchip_phy_config[i].mpixelclock = ~0UL;
+// 		rockchip_phy_config[i].sym_ctr = (u16)config[i * 4 + 1];
+// 		rockchip_phy_config[i].term = (u16)config[i * 4 + 2];
+// 		rockchip_phy_config[i].vlev_ctr = (u16)config[i * 4 + 3];
+// 	}
+//
+// 	return 0;
+// }
+
+static void repo_hpd_event(struct work_struct *p_work)
+{
+	struct rockchip_hdmi *hdmi = container_of(p_work, struct rockchip_hdmi, work.work);
+	bool change;
+
+	change = drm_helper_hpd_irq_event(hdmi->drm_dev);
+	if (change) {
+		dev_dbg(hdmi->dev, "hpd stat changed:%d\n", hdmi->hpd_stat);
+		// dw_hdmi_qp_cec_set_hpd(hdmi->hdmi_qp, hdmi->hpd_stat, change);
+	}
+}
+
+static irqreturn_t rockchip_hdmi_hardirq(int irq, void *dev_id)
+{
+	struct rockchip_hdmi *hdmi = dev_id;
+	u32 intr_stat, val;
+
+	regmap_read(hdmi->regmap, RK3588_GRF_SOC_STATUS1, &intr_stat);
+
+	if (intr_stat) {
+		dev_dbg(hdmi->dev, "hpd irq %#x\n", intr_stat);
+
+		if (!hdmi->id)
+			val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK,
+					    RK3588_HDMI0_HPD_INT_MSK);
+		else
+			val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK,
+					    RK3588_HDMI1_HPD_INT_MSK);
+		regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
+		return IRQ_WAKE_THREAD;
+	}
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t rockchip_hdmi_irq(int irq, void *dev_id)
+{
+	struct rockchip_hdmi *hdmi = dev_id;
+	u32 intr_stat, val;
+	int msecs;
+	bool stat;
+
+	regmap_read(hdmi->regmap, RK3588_GRF_SOC_STATUS1, &intr_stat);
+
+	if (!intr_stat)
+		return IRQ_NONE;
+
+	if (!hdmi->id) {
+		val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR,
+				    RK3588_HDMI0_HPD_INT_CLR);
+		if (intr_stat & RK3588_HDMI0_LEVEL_INT)
+			stat = true;
+		else
+			stat = false;
+	} else {
+		val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR,
+				    RK3588_HDMI1_HPD_INT_CLR);
+		if (intr_stat & RK3588_HDMI1_LEVEL_INT)
+			stat = true;
+		else
+			stat = false;
+	}
+
+	regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
+
+	if (stat) {
+		hdmi->hpd_stat = true;
+		msecs = 150;
+	} else {
+		hdmi->hpd_stat = false;
+		msecs = 20;
+	}
+	mod_delayed_work(hdmi->workqueue, &hdmi->work, msecs_to_jiffies(msecs));
+
+	if (!hdmi->id) {
+		val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR,
+				    RK3588_HDMI0_HPD_INT_CLR) |
+		      HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK);
+	} else {
+		val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR,
+				    RK3588_HDMI1_HPD_INT_CLR) |
+		      HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK);
+	}
+
+	regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
+
+	return IRQ_HANDLED;
+}
+
 static int rockchip_hdmi_parse_dt(struct rockchip_hdmi *hdmi)
 {
 	struct device_node *np = hdmi->dev->of_node;
+	int ret;

 	hdmi->regmap = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
 	if (IS_ERR(hdmi->regmap)) {
@@ -216,6 +1192,14 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi *hdmi)
 		return PTR_ERR(hdmi->regmap);
 	}

+	if (hdmi->is_hdmi_qp) {
+		hdmi->vo1_regmap = syscon_regmap_lookup_by_phandle(np, "rockchip,vo1_grf");
+		if (IS_ERR(hdmi->vo1_regmap)) {
+			DRM_DEV_ERROR(hdmi->dev, "Unable to get rockchip,vo1_grf\n");
+			return PTR_ERR(hdmi->vo1_regmap);
+		}
+	}
+
 	hdmi->ref_clk = devm_clk_get_optional(hdmi->dev, "ref");
 	if (!hdmi->ref_clk)
 		hdmi->ref_clk = devm_clk_get_optional(hdmi->dev, "vpll");
@@ -245,6 +1229,79 @@ static int rockchip_hdmi_parse_dt(struct rockchip_hdmi *hdmi)
 	if (IS_ERR(hdmi->avdd_1v8))
 		return PTR_ERR(hdmi->avdd_1v8);

+	hdmi->hclk_vio = devm_clk_get(hdmi->dev, "hclk_vio");
+	if (PTR_ERR(hdmi->hclk_vio) == -ENOENT) {
+		hdmi->hclk_vio = NULL;
+	} else if (PTR_ERR(hdmi->hclk_vio) == -EPROBE_DEFER) {
+		return -EPROBE_DEFER;
+	} else if (IS_ERR(hdmi->hclk_vio)) {
+		dev_err(hdmi->dev, "failed to get hclk_vio clock\n");
+		return PTR_ERR(hdmi->hclk_vio);
+	}
+
+	hdmi->hclk_vop = devm_clk_get(hdmi->dev, "hclk");
+	if (PTR_ERR(hdmi->hclk_vop) == -ENOENT) {
+		hdmi->hclk_vop = NULL;
+	} else if (PTR_ERR(hdmi->hclk_vop) == -EPROBE_DEFER) {
+		return -EPROBE_DEFER;
+	} else if (IS_ERR(hdmi->hclk_vop)) {
+		dev_err(hdmi->dev, "failed to get hclk_vop clock\n");
+		return PTR_ERR(hdmi->hclk_vop);
+	}
+
+	hdmi->aud_clk = devm_clk_get_optional(hdmi->dev, "aud");
+	if (IS_ERR(hdmi->aud_clk)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->aud_clk),
+			      "failed to get aud_clk clock\n");
+		return PTR_ERR(hdmi->aud_clk);
+	}
+
+	hdmi->hpd_clk = devm_clk_get_optional(hdmi->dev, "hpd");
+	if (IS_ERR(hdmi->hpd_clk)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->hpd_clk),
+			      "failed to get hpd_clk clock\n");
+		return PTR_ERR(hdmi->hpd_clk);
+	}
+
+	hdmi->hclk_vo1 = devm_clk_get_optional(hdmi->dev, "hclk_vo1");
+	if (IS_ERR(hdmi->hclk_vo1)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->hclk_vo1),
+			      "failed to get hclk_vo1 clock\n");
+		return PTR_ERR(hdmi->hclk_vo1);
+	}
+
+	hdmi->earc_clk = devm_clk_get_optional(hdmi->dev, "earc");
+	if (IS_ERR(hdmi->earc_clk)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->earc_clk),
+			      "failed to get earc_clk clock\n");
+		return PTR_ERR(hdmi->earc_clk);
+	}
+
+	hdmi->hdmitx_ref = devm_clk_get_optional(hdmi->dev, "hdmitx_ref");
+	if (IS_ERR(hdmi->hdmitx_ref)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->hdmitx_ref),
+			      "failed to get hdmitx_ref clock\n");
+		return PTR_ERR(hdmi->hdmitx_ref);
+	}
+
+	hdmi->pclk = devm_clk_get_optional(hdmi->dev, "pclk");
+	if (IS_ERR(hdmi->pclk)) {
+		dev_err_probe(hdmi->dev, PTR_ERR(hdmi->pclk),
+			      "failed to get pclk clock\n");
+		return PTR_ERR(hdmi->pclk);
+	}
+
+	hdmi->enable_gpio = devm_gpiod_get_optional(hdmi->dev, "enable",
+						    GPIOD_OUT_HIGH);
+	if (IS_ERR(hdmi->enable_gpio)) {
+		ret = PTR_ERR(hdmi->enable_gpio);
+		dev_err(hdmi->dev, "failed to request enable GPIO: %d\n", ret);
+		return ret;
+	}
+
+	hdmi->skip_check_420_mode =
+		of_property_read_bool(np, "skip-check-420-mode");
+
 	return 0;
 }

@@ -283,9 +1340,114 @@ dw_hdmi_rockchip_mode_valid(struct dw_hdmi *dw_hdmi, void *data,

 	return MODE_BAD;
 }
+/* [CC:] enable downstream mode_valid() */
+// static enum drm_mode_status
+// dw_hdmi_rockchip_mode_valid(struct drm_connector *connector, void *data,
+// 			    const struct drm_display_info *info,
+// 			    const struct drm_display_mode *mode)
+// {
+// 	struct drm_encoder *encoder = connector->encoder;
+// 	enum drm_mode_status status = MODE_OK;
+// 	struct drm_device *dev = connector->dev;
+// 	struct rockchip_drm_private *priv = dev->dev_private;
+// 	struct drm_crtc *crtc;
+// 	struct rockchip_hdmi *hdmi;
+//
+// 	/*
+// 	 * Pixel clocks we support are always < 2GHz and so fit in an
+// 	 * int.  We should make sure source rate does too so we don't get
+// 	 * overflow when we multiply by 1000.
+// 	 */
+// 	if (mode->clock > INT_MAX / 1000)
+// 		return MODE_BAD;
+//
+// 	if (!encoder) {
+// 		const struct drm_connector_helper_funcs *funcs;
+//
+// 		funcs = connector->helper_private;
+// 		if (funcs->atomic_best_encoder)
+// 			encoder = funcs->atomic_best_encoder(connector,
+// 							     connector->state);
+// 		else
+// 			encoder = funcs->best_encoder(connector);
+// 	}
+//
+// 	if (!encoder || !encoder->possible_crtcs)
+// 		return MODE_BAD;
+//
+// 	hdmi = to_rockchip_hdmi(encoder);
+//
+// 	/*
+// 	 * If sink max TMDS clock < 340MHz, we should check the mode pixel
+// 	 * clock > 340MHz is YCbCr420 or not and whether the platform supports
+// 	 * YCbCr420.
+// 	 */
+// 	if (!hdmi->skip_check_420_mode) {
+// 		if (mode->clock > 340000 &&
+// 		    connector->display_info.max_tmds_clock < 340000 &&
+// 		    (!drm_mode_is_420(&connector->display_info, mode) ||
+// 		     !connector->ycbcr_420_allowed))
+// 			return MODE_BAD;
+//
+// 		if (hdmi->max_tmdsclk <= 340000 && mode->clock > 340000 &&
+// 		    !drm_mode_is_420(&connector->display_info, mode))
+// 			return MODE_BAD;
+// 	};
+//
+// 	if (hdmi->phy) {
+// 		if (hdmi->is_hdmi_qp)
+// 			phy_set_bus_width(hdmi->phy, mode->clock * 10);
+// 		else
+// 			phy_set_bus_width(hdmi->phy, 8);
+// 	}
+//
+// 	/*
+// 	 * ensure all drm display mode can work, if someone want support more
+// 	 * resolutions, please limit the possible_crtc, only connect to
+// 	 * needed crtc.
+// 	 */
+// 	drm_for_each_crtc(crtc, connector->dev) {
+// 		int pipe = drm_crtc_index(crtc);
+// 		const struct rockchip_crtc_funcs *funcs =
+// 						priv->crtc_funcs[pipe];
+//
+// 		if (!(encoder->possible_crtcs & drm_crtc_mask(crtc)))
+// 			continue;
+// 		if (!funcs || !funcs->mode_valid)
+// 			continue;
+//
+// 		status = funcs->mode_valid(crtc, mode,
+// 					   DRM_MODE_CONNECTOR_HDMIA);
+// 		if (status != MODE_OK)
+// 			return status;
+// 	}
+//
+// 	return status;
+// }
+//

 static void dw_hdmi_rockchip_encoder_disable(struct drm_encoder *encoder)
 {
+	struct rockchip_hdmi *hdmi = to_rockchip_hdmi(encoder);
+	// struct drm_crtc *crtc = encoder->crtc;
+	// struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc->state);
+	//
+	// if (crtc->state->active_changed) {
+	// 	if (hdmi->plat_data->split_mode) {
+	// 		s->output_if &= ~(VOP_OUTPUT_IF_HDMI0 | VOP_OUTPUT_IF_HDMI1);
+	// 	} else {
+	// 		if (!hdmi->id)
+	// 			s->output_if &= ~VOP_OUTPUT_IF_HDMI1;
+	// 		else
+	// 			s->output_if &= ~VOP_OUTPUT_IF_HDMI0;
+	// 	}
+	// }
+	/*
+	 * when plug out hdmi it will be switch cvbs and then phy bus width
+	 * must be set as 8
+	 */
+	if (hdmi->phy)
+		phy_set_bus_width(hdmi->phy, 8);
 }

 static bool
@@ -301,6 +1463,27 @@ static void dw_hdmi_rockchip_encoder_mode_set(struct drm_encoder *encoder,
 					      struct drm_display_mode *adj_mode)
 {
 	struct rockchip_hdmi *hdmi = to_rockchip_hdmi(encoder);
+	struct drm_crtc *crtc;
+	struct rockchip_crtc_state *s;
+
+	if (!encoder->crtc)
+		return;
+	crtc = encoder->crtc;
+
+	if (!crtc->state)
+		return;
+	s = to_rockchip_crtc_state(crtc->state);
+
+	if (!s)
+		return;
+
+	if (hdmi->is_hdmi_qp) {
+		// s->dsc_enable = 0;
+		// if (hdmi->link_cfg.dsc_mode)
+		// 	dw_hdmi_qp_dsc_configure(hdmi, s, crtc->state);
+
+		phy_set_bus_width(hdmi->phy, hdmi->phy_bus_width);
+	}

 	clk_set_rate(hdmi->ref_clk, adj_mode->clock * 1000);
 }
@@ -308,14 +1491,25 @@ static void dw_hdmi_rockchip_encoder_mode_set(struct drm_encoder *encoder,
 static void dw_hdmi_rockchip_encoder_enable(struct drm_encoder *encoder)
 {
 	struct rockchip_hdmi *hdmi = to_rockchip_hdmi(encoder);
+	struct drm_crtc *crtc = encoder->crtc;
 	u32 val;
+	int mux;
 	int ret;

+	if (WARN_ON(!crtc || !crtc->state))
+		return;
+
+	if (hdmi->phy)
+		phy_set_bus_width(hdmi->phy, hdmi->phy_bus_width);
+
+	clk_set_rate(hdmi->ref_clk,
+		     crtc->state->adjusted_mode.crtc_clock * 1000);
+
 	if (hdmi->chip_data->lcdsel_grf_reg < 0)
 		return;

-	ret = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
-	if (ret)
+	mux = drm_of_encoder_active_endpoint_id(hdmi->dev->of_node, encoder);
+	if (mux)
 		val = hdmi->chip_data->lcdsel_lit;
 	else
 		val = hdmi->chip_data->lcdsel_big;
@@ -330,24 +1524,992 @@ static void dw_hdmi_rockchip_encoder_enable(struct drm_encoder *encoder)
 	if (ret != 0)
 		DRM_DEV_ERROR(hdmi->dev, "Could not write to GRF: %d\n", ret);

+	if (hdmi->chip_data->lcdsel_grf_reg == RK3288_GRF_SOC_CON6) {
+		struct rockchip_crtc_state *s =
+				to_rockchip_crtc_state(crtc->state);
+		u32 mode_mask = mux ? RK3288_HDMI_LCDC1_YUV420 :
+					RK3288_HDMI_LCDC0_YUV420;
+
+		if (s->output_mode == ROCKCHIP_OUT_MODE_YUV420)
+			val = HIWORD_UPDATE(mode_mask, mode_mask);
+		else
+			val = HIWORD_UPDATE(0, mode_mask);
+
+		regmap_write(hdmi->regmap, RK3288_GRF_SOC_CON16, val);
+	}
+
 	clk_disable_unprepare(hdmi->grf_clk);
 	DRM_DEV_DEBUG(hdmi->dev, "vop %s output to hdmi\n",
 		      ret ? "LIT" : "BIG");
 }

-static int
-dw_hdmi_rockchip_encoder_atomic_check(struct drm_encoder *encoder,
-				      struct drm_crtc_state *crtc_state,
-				      struct drm_connector_state *conn_state)
+static void rk3588_set_link_mode(struct rockchip_hdmi *hdmi)
 {
-	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+	int val;
+	bool is_hdmi0;

-	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
-	s->output_type = DRM_MODE_CONNECTOR_HDMIA;
+	if (!hdmi->id)
+		is_hdmi0 = true;
+	else
+		is_hdmi0 = false;
+
+	if (!hdmi->link_cfg.frl_mode) {
+		val = HIWORD_UPDATE(0, RK3588_HDMI21_MASK);
+		if (is_hdmi0)
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON4, val);
+		else
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON7, val);
+
+		val = HIWORD_UPDATE(0, RK3588_COMPRESS_MODE_MASK | RK3588_COLOR_FORMAT_MASK);
+		if (is_hdmi0)
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+		else
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+
+		return;
+	}
+
+	val = HIWORD_UPDATE(RK3588_HDMI21_MASK, RK3588_HDMI21_MASK);
+	if (is_hdmi0)
+		regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON4, val);
+	else
+		regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON7, val);
+
+	if (hdmi->link_cfg.dsc_mode) {
+		val = HIWORD_UPDATE(RK3588_COMPRESS_MODE_MASK | RK3588_COMPRESSED_DATA,
+				    RK3588_COMPRESS_MODE_MASK | RK3588_COLOR_FORMAT_MASK);
+		if (is_hdmi0)
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+		else
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+	} else {
+		val = HIWORD_UPDATE(0, RK3588_COMPRESS_MODE_MASK | RK3588_COLOR_FORMAT_MASK);
+		if (is_hdmi0)
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+		else
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+	}
+}
+
+static void rk3588_set_color_format(struct rockchip_hdmi *hdmi, u64 bus_format,
+				    u32 depth)
+{
+	u32 val = 0;
+
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+		val = HIWORD_UPDATE(0, RK3588_COLOR_FORMAT_MASK);
+		break;
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		val = HIWORD_UPDATE(RK3588_YUV420, RK3588_COLOR_FORMAT_MASK);
+		break;
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+		val = HIWORD_UPDATE(RK3588_YUV444, RK3588_COLOR_FORMAT_MASK);
+		break;
+	default:
+		dev_err(hdmi->dev, "can't set correct color format\n");
+		return;
+	}
+
+	if (hdmi->link_cfg.dsc_mode)
+		val = HIWORD_UPDATE(RK3588_COMPRESSED_DATA, RK3588_COLOR_FORMAT_MASK);
+
+	if (depth == 8)
+		val |= HIWORD_UPDATE(RK3588_8BPC, RK3588_COLOR_DEPTH_MASK);
+	else
+		val |= HIWORD_UPDATE(RK3588_10BPC, RK3588_COLOR_DEPTH_MASK);
+
+	if (!hdmi->id)
+		regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+	else
+		regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+}
+
+static void rk3588_set_grf_cfg(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	int color_depth;
+
+	rk3588_set_link_mode(hdmi);
+	color_depth = hdmi_bus_fmt_color_depth(hdmi->bus_format);
+	rk3588_set_color_format(hdmi, hdmi->bus_format, color_depth);
+}
+
+static void
+dw_hdmi_rockchip_select_output(struct drm_connector_state *conn_state,
+			       struct drm_crtc_state *crtc_state,
+			       struct rockchip_hdmi *hdmi,
+			       unsigned int *color_format,
+			       unsigned int *output_mode,
+			       unsigned long *bus_format,
+			       unsigned int *bus_width,
+			       unsigned long *enc_out_encoding,
+			       unsigned int *eotf)
+{
+	struct drm_display_info *info = &conn_state->connector->display_info;
+	struct drm_display_mode mode;
+	struct hdr_output_metadata *hdr_metadata;
+	u32 vic;
+	unsigned long tmdsclock, pixclock;
+	unsigned int color_depth;
+	bool support_dc = false;
+	bool sink_is_hdmi = true;
+	u32 max_tmds_clock = info->max_tmds_clock;
+	int output_eotf;
+
+	drm_mode_copy(&mode, &crtc_state->mode);
+	pixclock = mode.crtc_clock;
+	// if (hdmi->plat_data->split_mode) {
+	// 	drm_mode_convert_to_origin_mode(&mode);
+	// 	pixclock /= 2;
+	// }
+
+	vic = drm_match_cea_mode(&mode);
+
+	if (!hdmi->is_hdmi_qp)
+		sink_is_hdmi = dw_hdmi_get_output_whether_hdmi(hdmi->hdmi);
+
+	*color_format = RK_IF_FORMAT_RGB;
+
+	switch (hdmi->hdmi_output) {
+	case RK_IF_FORMAT_YCBCR_HQ:
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
+			*color_format = RK_IF_FORMAT_YCBCR444;
+		else if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422)
+			*color_format = RK_IF_FORMAT_YCBCR422;
+		else if (conn_state->connector->ycbcr_420_allowed &&
+			 drm_mode_is_420(info, &mode) &&
+			 (pixclock >= 594000 && !hdmi->is_hdmi_qp))
+			*color_format = RK_IF_FORMAT_YCBCR420;
+		break;
+	case RK_IF_FORMAT_YCBCR_LQ:
+		if (conn_state->connector->ycbcr_420_allowed &&
+		    drm_mode_is_420(info, &mode) && pixclock >= 594000)
+			*color_format = RK_IF_FORMAT_YCBCR420;
+		else if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422)
+			*color_format = RK_IF_FORMAT_YCBCR422;
+		else if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
+			*color_format = RK_IF_FORMAT_YCBCR444;
+		break;
+	case RK_IF_FORMAT_YCBCR420:
+		if (conn_state->connector->ycbcr_420_allowed &&
+		    drm_mode_is_420(info, &mode) && pixclock >= 594000)
+			*color_format = RK_IF_FORMAT_YCBCR420;
+		break;
+	case RK_IF_FORMAT_YCBCR422:
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422)
+			*color_format = RK_IF_FORMAT_YCBCR422;
+		break;
+	case RK_IF_FORMAT_YCBCR444:
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
+			*color_format = RK_IF_FORMAT_YCBCR444;
+		break;
+	case RK_IF_FORMAT_RGB:
+	default:
+		break;
+	}
+
+	if (*color_format == RK_IF_FORMAT_RGB &&
+	    info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30)
+		support_dc = true;
+	if (*color_format == RK_IF_FORMAT_YCBCR444 &&
+	    info->edid_hdmi_rgb444_dc_modes &
+	    (DRM_EDID_HDMI_DC_Y444 | DRM_EDID_HDMI_DC_30))
+		support_dc = true;
+	if (*color_format == RK_IF_FORMAT_YCBCR422)
+		support_dc = true;
+	if (*color_format == RK_IF_FORMAT_YCBCR420 &&
+	    info->hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_30)
+		support_dc = true;
+
+	if (hdmi->colordepth > 8 && support_dc)
+		color_depth = 10;
+	else
+		color_depth = 8;
+
+	if (!sink_is_hdmi) {
+		*color_format = RK_IF_FORMAT_RGB;
+		color_depth = 8;
+	}
+
+	*eotf = HDMI_EOTF_TRADITIONAL_GAMMA_SDR;
+	if (conn_state->hdr_output_metadata) {
+		hdr_metadata = (struct hdr_output_metadata *)
+			conn_state->hdr_output_metadata->data;
+		output_eotf = hdr_metadata->hdmi_metadata_type1.eotf;
+		if (output_eotf > HDMI_EOTF_TRADITIONAL_GAMMA_SDR &&
+		    output_eotf <= HDMI_EOTF_BT_2100_HLG)
+			*eotf = output_eotf;
+	}
+
+	hdmi->colorimetry = conn_state->colorspace;
+
+	if ((*eotf > HDMI_EOTF_TRADITIONAL_GAMMA_SDR &&
+	     conn_state->connector->hdr_sink_metadata.hdmi_type1.eotf &
+	     BIT(*eotf)) || ((hdmi->colorimetry >= DRM_MODE_COLORIMETRY_BT2020_CYCC) &&
+	     (hdmi->colorimetry <= DRM_MODE_COLORIMETRY_BT2020_YCC)))
+		*enc_out_encoding = V4L2_YCBCR_ENC_BT2020;
+	else if ((vic == 6) || (vic == 7) || (vic == 21) || (vic == 22) ||
+		 (vic == 2) || (vic == 3) || (vic == 17) || (vic == 18))
+		*enc_out_encoding = V4L2_YCBCR_ENC_601;
+	else
+		*enc_out_encoding = V4L2_YCBCR_ENC_709;
+
+	if (*enc_out_encoding == V4L2_YCBCR_ENC_BT2020) {
+		/* BT2020 require color depth at lest 10bit */
+		color_depth = 10;
+		/* We prefer use YCbCr422 to send 10bit */
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422)
+			*color_format = RK_IF_FORMAT_YCBCR422;
+		if (hdmi->is_hdmi_qp) {
+			if (info->color_formats & DRM_COLOR_FORMAT_YCBCR420) {
+				if (mode.clock >= 340000)
+					*color_format = RK_IF_FORMAT_YCBCR420;
+				else
+					*color_format = RK_IF_FORMAT_RGB;
+			} else {
+				*color_format = RK_IF_FORMAT_RGB;
+			}
+		}
+	}
+
+	if (mode.flags & DRM_MODE_FLAG_DBLCLK)
+		pixclock *= 2;
+	if ((mode.flags & DRM_MODE_FLAG_3D_MASK) ==
+		DRM_MODE_FLAG_3D_FRAME_PACKING)
+		pixclock *= 2;
+
+	if (*color_format == RK_IF_FORMAT_YCBCR422 || color_depth == 8)
+		tmdsclock = pixclock;
+	else
+		tmdsclock = pixclock * (color_depth) / 8;
+
+	if (*color_format == RK_IF_FORMAT_YCBCR420)
+		tmdsclock /= 2;
+
+	/* XXX: max_tmds_clock of some sink is 0, we think it is 340MHz. */
+	if (!max_tmds_clock)
+		max_tmds_clock = 340000;
+
+	max_tmds_clock = min(max_tmds_clock, hdmi->max_tmdsclk);
+
+	if ((tmdsclock > max_tmds_clock) && !hdmi->is_hdmi_qp) {
+		if (max_tmds_clock >= 594000) {
+			color_depth = 8;
+		} else if (max_tmds_clock > 340000) {
+			if (drm_mode_is_420(info, &mode) || tmdsclock >= 594000)
+				*color_format = RK_IF_FORMAT_YCBCR420;
+		} else {
+			color_depth = 8;
+			if (drm_mode_is_420(info, &mode) || tmdsclock >= 594000)
+				*color_format = RK_IF_FORMAT_YCBCR420;
+		}
+	}
+
+	if (hdmi->is_hdmi_qp) {
+		if (mode.clock >= 340000) {
+			if (drm_mode_is_420(info, &mode))
+				*color_format = RK_IF_FORMAT_YCBCR420;
+			else
+				*color_format = RK_IF_FORMAT_RGB;
+		} else if (tmdsclock > max_tmds_clock) {
+			color_depth = 8;
+			if (drm_mode_is_420(info, &mode))
+				*color_format = RK_IF_FORMAT_YCBCR420;
+		}
+	}
+
+	if (*color_format == RK_IF_FORMAT_YCBCR420) {
+		*output_mode = ROCKCHIP_OUT_MODE_YUV420;
+		if (color_depth > 8)
+			*bus_format = MEDIA_BUS_FMT_UYYVYY10_0_5X30;
+		else
+			*bus_format = MEDIA_BUS_FMT_UYYVYY8_0_5X24;
+		*bus_width = color_depth / 2;
+	} else {
+		*output_mode = ROCKCHIP_OUT_MODE_AAAA;
+		if (color_depth > 8) {
+			if (*color_format != RK_IF_FORMAT_RGB &&
+			    !hdmi->unsupported_yuv_input)
+				*bus_format = MEDIA_BUS_FMT_YUV10_1X30;
+			else
+				*bus_format = MEDIA_BUS_FMT_RGB101010_1X30;
+		} else {
+			if (*color_format != RK_IF_FORMAT_RGB &&
+			    !hdmi->unsupported_yuv_input)
+				*bus_format = MEDIA_BUS_FMT_YUV8_1X24;
+			else
+				*bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+		}
+		if (*color_format == RK_IF_FORMAT_YCBCR422)
+			*bus_width = 8;
+		else
+			*bus_width = color_depth;
+	}
+
+	hdmi->bus_format = *bus_format;
+
+	if (*color_format == RK_IF_FORMAT_YCBCR422) {
+		if (color_depth == 12)
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY12_1X24;
+		else if (color_depth == 10)
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY10_1X20;
+		else
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY8_1X16;
+	} else {
+		hdmi->output_bus_format = *bus_format;
+	}
+}
+
+static bool
+dw_hdmi_rockchip_check_color(struct drm_connector_state *conn_state,
+			     struct rockchip_hdmi *hdmi)
+{
+	struct drm_crtc_state *crtc_state = conn_state->crtc->state;
+	unsigned int colorformat;
+	unsigned long bus_format;
+	unsigned long output_bus_format = hdmi->output_bus_format;
+	unsigned long enc_out_encoding = hdmi->enc_out_encoding;
+	unsigned int eotf, bus_width;
+	unsigned int output_mode;
+
+	dw_hdmi_rockchip_select_output(conn_state, crtc_state, hdmi,
+				       &colorformat,
+				       &output_mode, &bus_format, &bus_width,
+				       &hdmi->enc_out_encoding, &eotf);
+
+	if (output_bus_format != hdmi->output_bus_format ||
+	    enc_out_encoding != hdmi->enc_out_encoding)
+		return true;
+	else
+		return false;
+}
+
+static int
+dw_hdmi_rockchip_encoder_atomic_check(struct drm_encoder *encoder,
+				      struct drm_crtc_state *crtc_state,
+				      struct drm_connector_state *conn_state)
+{
+	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+	struct rockchip_hdmi *hdmi = to_rockchip_hdmi(encoder);
+	unsigned int colorformat, bus_width, tmdsclk;
+	struct drm_display_mode mode;
+	unsigned int output_mode;
+	unsigned long bus_format;
+	int color_depth;
+	bool secondary = false;
+
+	/*
+	 * There are two hdmi but only one encoder in split mode,
+	 * so we need to check twice.
+	 */
+secondary:
+	drm_mode_copy(&mode, &crtc_state->mode);
+
+	hdmi->vp_id = 0;
+	// hdmi->vp_id = s->vp_id;
+	// if (hdmi->plat_data->split_mode)
+	// 	drm_mode_convert_to_origin_mode(&mode);
+
+	int eotf;
+	dw_hdmi_rockchip_select_output(conn_state, crtc_state, hdmi,
+				       &colorformat,
+				       &output_mode, &bus_format, &bus_width,
+				       // &hdmi->enc_out_encoding, &s->eotf);
+				       &hdmi->enc_out_encoding, &eotf);
+
+	s->bus_format = bus_format;
+	if (hdmi->is_hdmi_qp) {
+		color_depth = hdmi_bus_fmt_color_depth(bus_format);
+		tmdsclk = hdmi_get_tmdsclock(hdmi, crtc_state->mode.clock);
+		if (hdmi_bus_fmt_is_yuv420(hdmi->output_bus_format))
+			tmdsclk /= 2;
+		hdmi_select_link_config(hdmi, crtc_state, tmdsclk);
+
+		if (hdmi->link_cfg.frl_mode) {
+			gpiod_set_value(hdmi->enable_gpio, 0);
+			/* in the current version, support max 40G frl */
+			if (hdmi->link_cfg.rate_per_lane >= 10) {
+				hdmi->link_cfg.frl_lanes = 4;
+				hdmi->link_cfg.rate_per_lane = 10;
+			}
+			bus_width = hdmi->link_cfg.frl_lanes *
+				hdmi->link_cfg.rate_per_lane * 1000000;
+			/* 10 bit color depth and frl mode */
+			if (color_depth == 10)
+				bus_width |=
+					COLOR_DEPTH_10BIT | HDMI_FRL_MODE;
+			else
+				bus_width |= HDMI_FRL_MODE;
+		} else {
+			gpiod_set_value(hdmi->enable_gpio, 1);
+			bus_width = hdmi_get_tmdsclock(hdmi, mode.clock * 10);
+			if (hdmi_bus_fmt_is_yuv420(hdmi->output_bus_format))
+				bus_width /= 2;
+
+			if (color_depth == 10)
+				bus_width |= COLOR_DEPTH_10BIT;
+		}
+	}
+
+	hdmi->phy_bus_width = bus_width;
+
+	if (hdmi->phy)
+		phy_set_bus_width(hdmi->phy, bus_width);
+
+	s->output_type = DRM_MODE_CONNECTOR_HDMIA;
+	// s->tv_state = &conn_state->tv;
+	//
+	// if (hdmi->plat_data->split_mode) {
+	// 	s->output_flags |= ROCKCHIP_OUTPUT_DUAL_CHANNEL_LEFT_RIGHT_MODE;
+	// 	if (hdmi->plat_data->right && hdmi->id)
+	// 		s->output_flags |= ROCKCHIP_OUTPUT_DATA_SWAP;
+	// 	s->output_if |= VOP_OUTPUT_IF_HDMI0 | VOP_OUTPUT_IF_HDMI1;
+	// } else {
+	// 	if (!hdmi->id)
+	// 		s->output_if |= VOP_OUTPUT_IF_HDMI0;
+	// 	else
+	// 		s->output_if |= VOP_OUTPUT_IF_HDMI1;
+	// }
+
+	s->output_mode = output_mode;
+	hdmi->bus_format = s->bus_format;
+
+	if (hdmi->enc_out_encoding == V4L2_YCBCR_ENC_BT2020)
+		s->color_space = V4L2_COLORSPACE_BT2020;
+	else if (colorformat == RK_IF_FORMAT_RGB)
+		s->color_space = V4L2_COLORSPACE_DEFAULT;
+	else if (hdmi->enc_out_encoding == V4L2_YCBCR_ENC_709)
+		s->color_space = V4L2_COLORSPACE_REC709;
+	else
+		s->color_space = V4L2_COLORSPACE_SMPTE170M;
+
+	if (hdmi->plat_data->split_mode && !secondary) {
+		hdmi = rockchip_hdmi_find_by_id(hdmi->dev->driver, !hdmi->id);
+		secondary = true;
+		goto secondary;
+	}

 	return 0;
 }

+static unsigned long
+dw_hdmi_rockchip_get_input_bus_format(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->bus_format;
+}
+
+static unsigned long
+dw_hdmi_rockchip_get_output_bus_format(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->output_bus_format;
+}
+
+static unsigned long
+dw_hdmi_rockchip_get_enc_in_encoding(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->enc_out_encoding;
+}
+
+static unsigned long
+dw_hdmi_rockchip_get_enc_out_encoding(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->enc_out_encoding;
+}
+
+static unsigned long
+dw_hdmi_rockchip_get_quant_range(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->hdmi_quant_range;
+}
+
+static struct drm_property *
+dw_hdmi_rockchip_get_hdr_property(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->hdr_panel_metadata_property;
+}
+
+static struct drm_property_blob *
+dw_hdmi_rockchip_get_hdr_blob(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return hdmi->hdr_panel_blob_ptr;
+}
+
+static bool
+dw_hdmi_rockchip_get_color_changed(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	bool ret = false;
+
+	if (hdmi->color_changed)
+		ret = true;
+	hdmi->color_changed = 0;
+
+	return ret;
+}
+
+#if 0
+static int
+dw_hdmi_rockchip_get_edid_dsc_info(void *data, struct edid *edid)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	if (!edid)
+		return -EINVAL;
+
+	return rockchip_drm_parse_cea_ext(&hdmi->dsc_cap,
+					  &hdmi->max_frl_rate_per_lane,
+					  &hdmi->max_lanes, edid);
+}
+
+static int
+dw_hdmi_rockchip_get_next_hdr_data(void *data, struct edid *edid,
+				   struct drm_connector *connector)
+{
+	int ret;
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	struct next_hdr_sink_data *sink_data = &hdmi->next_hdr_data;
+	size_t size = sizeof(*sink_data);
+	struct drm_property *property = hdmi->next_hdr_sink_data_property;
+	struct drm_property_blob *blob = hdmi->hdr_panel_blob_ptr;
+
+	if (!edid)
+		return -EINVAL;
+
+	rockchip_drm_parse_next_hdr(sink_data, edid);
+
+	ret = drm_property_replace_global_blob(connector->dev, &blob, size, sink_data,
+					       &connector->base, property);
+
+	return ret;
+};
+#endif
+
+static
+struct dw_hdmi_link_config *dw_hdmi_rockchip_get_link_cfg(void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	return &hdmi->link_cfg;
+}
+
+#if 0
+static const struct drm_prop_enum_list color_depth_enum_list[] = {
+	{ 0, "Automatic" }, /* Prefer highest color depth */
+	{ 8, "24bit" },
+	{ 10, "30bit" },
+};
+
+static const struct drm_prop_enum_list drm_hdmi_output_enum_list[] = {
+	{ RK_IF_FORMAT_RGB, "rgb" },
+	{ RK_IF_FORMAT_YCBCR444, "ycbcr444" },
+	{ RK_IF_FORMAT_YCBCR422, "ycbcr422" },
+	{ RK_IF_FORMAT_YCBCR420, "ycbcr420" },
+	{ RK_IF_FORMAT_YCBCR_HQ, "ycbcr_high_subsampling" },
+	{ RK_IF_FORMAT_YCBCR_LQ, "ycbcr_low_subsampling" },
+	{ RK_IF_FORMAT_MAX, "invalid_output" },
+};
+
+static const struct drm_prop_enum_list quant_range_enum_list[] = {
+	{ HDMI_QUANTIZATION_RANGE_DEFAULT, "default" },
+	{ HDMI_QUANTIZATION_RANGE_LIMITED, "limit" },
+	{ HDMI_QUANTIZATION_RANGE_FULL, "full" },
+};
+
+static const struct drm_prop_enum_list output_hdmi_dvi_enum_list[] = {
+	{ 0, "auto" },
+	{ 1, "force_hdmi" },
+	{ 2, "force_dvi" },
+};
+
+static const struct drm_prop_enum_list output_type_cap_list[] = {
+	{ 0, "DVI" },
+	{ 1, "HDMI" },
+};
+#endif
+
+static void
+dw_hdmi_rockchip_attach_properties(struct drm_connector *connector,
+				   unsigned int color, int version,
+				   void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	struct drm_property *prop;
+	// struct rockchip_drm_private *private = connector->dev->dev_private;
+
+	switch (color) {
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+		hdmi->hdmi_output = RK_IF_FORMAT_RGB;
+		hdmi->colordepth = 10;
+		break;
+	case MEDIA_BUS_FMT_YUV8_1X24:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR444;
+		hdmi->colordepth = 8;
+		break;
+	case MEDIA_BUS_FMT_YUV10_1X30:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR444;
+		hdmi->colordepth = 10;
+		break;
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR422;
+		hdmi->colordepth = 10;
+		break;
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR422;
+		hdmi->colordepth = 8;
+		break;
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR420;
+		hdmi->colordepth = 8;
+		break;
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		hdmi->hdmi_output = RK_IF_FORMAT_YCBCR420;
+		hdmi->colordepth = 10;
+		break;
+	default:
+		hdmi->hdmi_output = RK_IF_FORMAT_RGB;
+		hdmi->colordepth = 8;
+	}
+
+	hdmi->bus_format = color;
+
+	if (hdmi->hdmi_output == RK_IF_FORMAT_YCBCR422) {
+		if (hdmi->colordepth == 12)
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY12_1X24;
+		else if (hdmi->colordepth == 10)
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY10_1X20;
+		else
+			hdmi->output_bus_format = MEDIA_BUS_FMT_UYVY8_1X16;
+	} else {
+		hdmi->output_bus_format = hdmi->bus_format;
+	}
+
+#if 0
+	/* RK3368 does not support deep color mode */
+	if (!hdmi->color_depth_property && !hdmi->unsupported_deep_color) {
+		prop = drm_property_create_enum(connector->dev, 0,
+						RK_IF_PROP_COLOR_DEPTH,
+						color_depth_enum_list,
+						ARRAY_SIZE(color_depth_enum_list));
+		if (prop) {
+			hdmi->color_depth_property = prop;
+			drm_object_attach_property(&connector->base, prop, 0);
+		}
+	}
+
+	prop = drm_property_create_enum(connector->dev, 0, RK_IF_PROP_COLOR_FORMAT,
+					drm_hdmi_output_enum_list,
+					ARRAY_SIZE(drm_hdmi_output_enum_list));
+	if (prop) {
+		hdmi->hdmi_output_property = prop;
+		drm_object_attach_property(&connector->base, prop, 0);
+	}
+
+	prop = drm_property_create_range(connector->dev, 0,
+					 RK_IF_PROP_COLOR_DEPTH_CAPS,
+					 0, 0xff);
+	if (prop) {
+		hdmi->colordepth_capacity = prop;
+		drm_object_attach_property(&connector->base, prop, 0);
+	}
+
+	prop = drm_property_create_range(connector->dev, 0,
+					 RK_IF_PROP_COLOR_FORMAT_CAPS,
+					 0, 0xf);
+	if (prop) {
+		hdmi->outputmode_capacity = prop;
+		drm_object_attach_property(&connector->base, prop, 0);
+	}
+
+	prop = drm_property_create(connector->dev,
+				   DRM_MODE_PROP_BLOB |
+				   DRM_MODE_PROP_IMMUTABLE,
+				   "HDR_PANEL_METADATA", 0);
+	if (prop) {
+		hdmi->hdr_panel_metadata_property = prop;
+		drm_object_attach_property(&connector->base, prop, 0);
+	}
+
+	prop = drm_property_create(connector->dev,
+				   DRM_MODE_PROP_BLOB |
+				   DRM_MODE_PROP_IMMUTABLE,
+				   "NEXT_HDR_SINK_DATA", 0);
+	if (prop) {
+		hdmi->next_hdr_sink_data_property = prop;
+		drm_object_attach_property(&connector->base, prop, 0);
+	}
+
+	prop = drm_property_create_bool(connector->dev, DRM_MODE_PROP_IMMUTABLE,
+					"USER_SPLIT_MODE");
+	if (prop) {
+		hdmi->user_split_mode_prop = prop;
+		drm_object_attach_property(&connector->base, prop,
+					   hdmi->user_split_mode ? 1 : 0);
+	}
+
+	if (!hdmi->is_hdmi_qp) {
+		prop = drm_property_create_enum(connector->dev, 0,
+						"output_hdmi_dvi",
+						output_hdmi_dvi_enum_list,
+						ARRAY_SIZE(output_hdmi_dvi_enum_list));
+		if (prop) {
+			hdmi->output_hdmi_dvi = prop;
+			drm_object_attach_property(&connector->base, prop, 0);
+		}
+
+		prop = drm_property_create_enum(connector->dev, 0,
+						 "output_type_capacity",
+						 output_type_cap_list,
+						 ARRAY_SIZE(output_type_cap_list));
+		if (prop) {
+			hdmi->output_type_capacity = prop;
+			drm_object_attach_property(&connector->base, prop, 0);
+		}
+
+		prop = drm_property_create_enum(connector->dev, 0,
+						"hdmi_quant_range",
+						quant_range_enum_list,
+						ARRAY_SIZE(quant_range_enum_list));
+		if (prop) {
+			hdmi->quant_range = prop;
+			drm_object_attach_property(&connector->base, prop, 0);
+		}
+	}
+#endif
+
+	prop = connector->dev->mode_config.hdr_output_metadata_property;
+	if (version >= 0x211a || hdmi->is_hdmi_qp)
+		drm_object_attach_property(&connector->base, prop, 0);
+
+	if (!drm_mode_create_hdmi_colorspace_property(connector, 0))
+		drm_object_attach_property(&connector->base,
+					   connector->colorspace_property, 0);
+
+#if 0
+	// [CC:] if this is not needed, also drop connector_id_prop
+	if (!private->connector_id_prop)
+		private->connector_id_prop = drm_property_create_range(connector->dev,
+				DRM_MODE_PROP_ATOMIC | DRM_MODE_PROP_IMMUTABLE,
+				"CONNECTOR_ID", 0, 0xf);
+	if (private->connector_id_prop)
+		drm_object_attach_property(&connector->base, private->connector_id_prop, hdmi->id);
+#endif
+}
+
+static void
+dw_hdmi_rockchip_destroy_properties(struct drm_connector *connector,
+				    void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	if (hdmi->color_depth_property) {
+		drm_property_destroy(connector->dev,
+				     hdmi->color_depth_property);
+		hdmi->color_depth_property = NULL;
+	}
+
+	if (hdmi->hdmi_output_property) {
+		drm_property_destroy(connector->dev,
+				     hdmi->hdmi_output_property);
+		hdmi->hdmi_output_property = NULL;
+	}
+
+	if (hdmi->colordepth_capacity) {
+		drm_property_destroy(connector->dev,
+				     hdmi->colordepth_capacity);
+		hdmi->colordepth_capacity = NULL;
+	}
+
+	if (hdmi->outputmode_capacity) {
+		drm_property_destroy(connector->dev,
+				     hdmi->outputmode_capacity);
+		hdmi->outputmode_capacity = NULL;
+	}
+
+	if (hdmi->quant_range) {
+		drm_property_destroy(connector->dev,
+				     hdmi->quant_range);
+		hdmi->quant_range = NULL;
+	}
+
+	if (hdmi->hdr_panel_metadata_property) {
+		drm_property_destroy(connector->dev,
+				     hdmi->hdr_panel_metadata_property);
+		hdmi->hdr_panel_metadata_property = NULL;
+	}
+
+	if (hdmi->next_hdr_sink_data_property) {
+		drm_property_destroy(connector->dev,
+				     hdmi->next_hdr_sink_data_property);
+		hdmi->next_hdr_sink_data_property = NULL;
+	}
+
+	if (hdmi->output_hdmi_dvi) {
+		drm_property_destroy(connector->dev,
+				     hdmi->output_hdmi_dvi);
+		hdmi->output_hdmi_dvi = NULL;
+	}
+
+	if (hdmi->output_type_capacity) {
+		drm_property_destroy(connector->dev,
+				     hdmi->output_type_capacity);
+		hdmi->output_type_capacity = NULL;
+	}
+
+	if (hdmi->user_split_mode_prop) {
+		drm_property_destroy(connector->dev,
+				     hdmi->user_split_mode_prop);
+		hdmi->user_split_mode_prop = NULL;
+	}
+}
+
+static int
+dw_hdmi_rockchip_set_property(struct drm_connector *connector,
+			      struct drm_connector_state *state,
+			      struct drm_property *property,
+			      u64 val,
+			      void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	struct drm_mode_config *config = &connector->dev->mode_config;
+
+	if (property == hdmi->color_depth_property) {
+		hdmi->colordepth = val;
+		/* If hdmi is disconnected, state->crtc is null */
+		if (!state->crtc)
+			return 0;
+		if (dw_hdmi_rockchip_check_color(state, hdmi))
+			hdmi->color_changed++;
+		return 0;
+	} else if (property == hdmi->hdmi_output_property) {
+		hdmi->hdmi_output = val;
+		if (!state->crtc)
+			return 0;
+		if (dw_hdmi_rockchip_check_color(state, hdmi))
+			hdmi->color_changed++;
+		return 0;
+	} else if (property == hdmi->quant_range) {
+		u64 quant_range = hdmi->hdmi_quant_range;
+
+		hdmi->hdmi_quant_range = val;
+		if (quant_range != hdmi->hdmi_quant_range)
+			dw_hdmi_set_quant_range(hdmi->hdmi);
+		return 0;
+	} else if (property == config->hdr_output_metadata_property) {
+		return 0;
+	} else if (property == hdmi->output_hdmi_dvi) {
+		if (hdmi->force_output != val)
+			hdmi->color_changed++;
+		hdmi->force_output = val;
+		dw_hdmi_set_output_type(hdmi->hdmi, val);
+		return 0;
+	} else if (property == hdmi->colordepth_capacity) {
+		return 0;
+	} else if (property == hdmi->outputmode_capacity) {
+		return 0;
+	} else if (property == hdmi->output_type_capacity) {
+		return 0;
+	}
+
+	DRM_ERROR("Unknown property [PROP:%d:%s]\n",
+		  property->base.id, property->name);
+
+	return -EINVAL;
+}
+
+static int
+dw_hdmi_rockchip_get_property(struct drm_connector *connector,
+			      const struct drm_connector_state *state,
+			      struct drm_property *property,
+			      u64 *val,
+			      void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	struct drm_display_info *info = &connector->display_info;
+	struct drm_mode_config *config = &connector->dev->mode_config;
+
+	if (property == hdmi->color_depth_property) {
+		*val = hdmi->colordepth;
+		return 0;
+	} else if (property == hdmi->hdmi_output_property) {
+		*val = hdmi->hdmi_output;
+		return 0;
+	} else if (property == hdmi->colordepth_capacity) {
+		*val = BIT(RK_IF_DEPTH_8);
+		/* RK3368 only support 8bit */
+		if (hdmi->unsupported_deep_color)
+			return 0;
+		if (info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30)
+			*val |= BIT(RK_IF_DEPTH_10);
+		if (info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_36)
+			*val |= BIT(RK_IF_DEPTH_12);
+		if (info->edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_48)
+			*val |= BIT(RK_IF_DEPTH_16);
+		if (info->hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_30)
+			*val |= BIT(RK_IF_DEPTH_420_10);
+		if (info->hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_36)
+			*val |= BIT(RK_IF_DEPTH_420_12);
+		if (info->hdmi.y420_dc_modes & DRM_EDID_YCBCR420_DC_48)
+			*val |= BIT(RK_IF_DEPTH_420_16);
+		return 0;
+	} else if (property == hdmi->outputmode_capacity) {
+		*val = BIT(RK_IF_FORMAT_RGB);
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
+			*val |= BIT(RK_IF_FORMAT_YCBCR444);
+		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR422)
+			*val |= BIT(RK_IF_FORMAT_YCBCR422);
+		if (connector->ycbcr_420_allowed &&
+		    info->color_formats & DRM_COLOR_FORMAT_YCBCR420)
+			*val |= BIT(RK_IF_FORMAT_YCBCR420);
+		return 0;
+	} else if (property == hdmi->quant_range) {
+		*val = hdmi->hdmi_quant_range;
+		return 0;
+	} else if (property == config->hdr_output_metadata_property) {
+		*val = state->hdr_output_metadata ?
+			state->hdr_output_metadata->base.id : 0;
+		return 0;
+	} else if (property == hdmi->output_hdmi_dvi) {
+		*val = hdmi->force_output;
+		return 0;
+	} else if (property == hdmi->output_type_capacity) {
+		*val = dw_hdmi_get_output_type_cap(hdmi->hdmi);
+		return 0;
+	} else if (property == hdmi->user_split_mode_prop) {
+		*val = hdmi->user_split_mode;
+		return 0;
+	}
+
+	DRM_ERROR("Unknown property [PROP:%d:%s]\n",
+		  property->base.id, property->name);
+
+	return -EINVAL;
+}
+
+static const struct dw_hdmi_property_ops dw_hdmi_rockchip_property_ops = {
+	.attach_properties	= dw_hdmi_rockchip_attach_properties,
+	.destroy_properties	= dw_hdmi_rockchip_destroy_properties,
+	.set_property		= dw_hdmi_rockchip_set_property,
+	.get_property		= dw_hdmi_rockchip_get_property,
+};
+
 static const struct drm_encoder_helper_funcs dw_hdmi_rockchip_encoder_helper_funcs = {
 	.mode_fixup = dw_hdmi_rockchip_encoder_mode_fixup,
 	.mode_set   = dw_hdmi_rockchip_encoder_mode_set,
@@ -356,20 +2518,24 @@ static const struct drm_encoder_helper_funcs dw_hdmi_rockchip_encoder_helper_fun
 	.atomic_check = dw_hdmi_rockchip_encoder_atomic_check,
 };

-static int dw_hdmi_rockchip_genphy_init(struct dw_hdmi *dw_hdmi, void *data,
-					const struct drm_display_info *display,
-					const struct drm_display_mode *mode)
+static void dw_hdmi_rockchip_genphy_disable(struct dw_hdmi *dw_hdmi, void *data)
 {
 	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;

-	return phy_power_on(hdmi->phy);
+	while (hdmi->phy->power_count > 0)
+		phy_power_off(hdmi->phy);
 }

-static void dw_hdmi_rockchip_genphy_disable(struct dw_hdmi *dw_hdmi, void *data)
+static int dw_hdmi_rockchip_genphy_init(struct dw_hdmi *dw_hdmi, void *data,
+					const struct drm_display_info *display,
+					const struct drm_display_mode *mode)
 {
 	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;

-	phy_power_off(hdmi->phy);
+	dw_hdmi_rockchip_genphy_disable(dw_hdmi, data);
+	dw_hdmi_set_high_tmds_clock_ratio(dw_hdmi, display);
+
+	return phy_power_on(hdmi->phy);
 }

 static void dw_hdmi_rk3228_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
@@ -436,6 +2602,90 @@ static void dw_hdmi_rk3328_setup_hpd(struct dw_hdmi *dw_hdmi, void *data)
 			      RK3328_HDMI_HPD_IOE));
 }

+static void dw_hdmi_qp_rockchip_phy_disable(struct dw_hdmi_qp *dw_hdmi,
+					    void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	while (hdmi->phy->power_count > 0)
+		phy_power_off(hdmi->phy);
+}
+
+static int dw_hdmi_qp_rockchip_genphy_init(struct dw_hdmi_qp *dw_hdmi, void *data,
+					   struct drm_display_mode *mode)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	dw_hdmi_qp_rockchip_phy_disable(dw_hdmi, data);
+
+	return phy_power_on(hdmi->phy);
+}
+
+static enum drm_connector_status
+dw_hdmi_rk3588_read_hpd(struct dw_hdmi_qp *dw_hdmi, void *data)
+{
+	u32 val;
+	int ret;
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	regmap_read(hdmi->regmap, RK3588_GRF_SOC_STATUS1, &val);
+
+	if (!hdmi->id) {
+		if (val & RK3588_HDMI0_LEVEL_INT) {
+			hdmi->hpd_stat = true;
+			ret = connector_status_connected;
+		} else {
+			hdmi->hpd_stat = false;
+			ret = connector_status_disconnected;
+		}
+	} else {
+		if (val & RK3588_HDMI1_LEVEL_INT) {
+			hdmi->hpd_stat = true;
+			ret = connector_status_connected;
+		} else {
+			hdmi->hpd_stat = false;
+			ret = connector_status_disconnected;
+		}
+	}
+
+	return ret;
+}
+
+static void dw_hdmi_rk3588_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+	u32 val;
+
+	if (!hdmi->id) {
+		val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR,
+				    RK3588_HDMI0_HPD_INT_CLR) |
+		      HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK);
+	} else {
+		val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR,
+				    RK3588_HDMI1_HPD_INT_CLR) |
+		      HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK);
+	}
+
+	regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
+}
+
+static void dw_hdmi_rk3588_phy_set_mode(struct dw_hdmi_qp *dw_hdmi, void *data,
+					u32 mode_mask, bool enable)
+{
+	struct rockchip_hdmi *hdmi = (struct rockchip_hdmi *)data;
+
+	if (!hdmi->phy)
+		return;
+
+	/* set phy earc/frl mode */
+	if (enable)
+		hdmi->phy_bus_width |= mode_mask;
+	else
+		hdmi->phy_bus_width &= ~mode_mask;
+
+	phy_set_bus_width(hdmi->phy, hdmi->phy_bus_width);
+}
+
 static const struct dw_hdmi_phy_ops rk3228_hdmi_phy_ops = {
 	.init		= dw_hdmi_rockchip_genphy_init,
 	.disable	= dw_hdmi_rockchip_genphy_disable,
@@ -525,6 +2775,30 @@ static const struct dw_hdmi_plat_data rk3568_hdmi_drv_data = {
 	.use_drm_infoframe = true,
 };

+static const struct dw_hdmi_qp_phy_ops rk3588_hdmi_phy_ops = {
+	.init		= dw_hdmi_qp_rockchip_genphy_init,
+	.disable	= dw_hdmi_qp_rockchip_phy_disable,
+	.read_hpd	= dw_hdmi_rk3588_read_hpd,
+	.setup_hpd	= dw_hdmi_rk3588_setup_hpd,
+	.set_mode       = dw_hdmi_rk3588_phy_set_mode,
+};
+
+struct rockchip_hdmi_chip_data rk3588_hdmi_chip_data = {
+	.lcdsel_grf_reg = -1,
+	.ddc_en_reg = RK3588_GRF_VO1_CON3,
+	.split_mode = true,
+};
+
+static const struct dw_hdmi_plat_data rk3588_hdmi_drv_data = {
+	.phy_data = &rk3588_hdmi_chip_data,
+	.qp_phy_ops = &rk3588_hdmi_phy_ops,
+	.phy_name = "samsung_hdptx_phy",
+	.phy_force_vendor = true,
+	.ycbcr_420_allowed = true,
+	.is_hdmi_qp = true,
+	.use_drm_infoframe = true,
+};
+
 static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
 	{ .compatible = "rockchip,rk3228-dw-hdmi",
 	  .data = &rk3228_hdmi_drv_data
@@ -541,6 +2815,9 @@ static const struct of_device_id dw_hdmi_rockchip_dt_ids[] = {
 	{ .compatible = "rockchip,rk3568-dw-hdmi",
 	  .data = &rk3568_hdmi_drv_data
 	},
+	{ .compatible = "rockchip,rk3588-dw-hdmi",
+	  .data = &rk3588_hdmi_drv_data
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, dw_hdmi_rockchip_dt_ids);
@@ -550,44 +2827,103 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct dw_hdmi_plat_data *plat_data;
-	const struct of_device_id *match;
 	struct drm_device *drm = data;
 	struct drm_encoder *encoder;
 	struct rockchip_hdmi *hdmi;
+	struct rockchip_hdmi *secondary;
 	int ret;
+	u32 val;

 	if (!pdev->dev.of_node)
 		return -ENODEV;

-	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
+	hdmi = platform_get_drvdata(pdev);
 	if (!hdmi)
 		return -ENOMEM;

-	match = of_match_node(dw_hdmi_rockchip_dt_ids, pdev->dev.of_node);
-	plat_data = devm_kmemdup(&pdev->dev, match->data,
-					     sizeof(*plat_data), GFP_KERNEL);
-	if (!plat_data)
-		return -ENOMEM;
+	plat_data = hdmi->plat_data;
+	hdmi->drm_dev = drm;

-	hdmi->dev = &pdev->dev;
-	hdmi->plat_data = plat_data;
-	hdmi->chip_data = plat_data->phy_data;
 	plat_data->phy_data = hdmi;
-	plat_data->priv_data = hdmi;
-	encoder = &hdmi->encoder.encoder;
+	plat_data->get_input_bus_format =
+		dw_hdmi_rockchip_get_input_bus_format;
+	plat_data->get_output_bus_format =
+		dw_hdmi_rockchip_get_output_bus_format;
+	plat_data->get_enc_in_encoding =
+		dw_hdmi_rockchip_get_enc_in_encoding;
+	plat_data->get_enc_out_encoding =
+		dw_hdmi_rockchip_get_enc_out_encoding;
+	plat_data->get_quant_range =
+		dw_hdmi_rockchip_get_quant_range;
+	plat_data->get_hdr_property =
+		dw_hdmi_rockchip_get_hdr_property;
+	plat_data->get_hdr_blob =
+		dw_hdmi_rockchip_get_hdr_blob;
+	plat_data->get_color_changed =
+		dw_hdmi_rockchip_get_color_changed;
+	// plat_data->get_edid_dsc_info =
+	// 	dw_hdmi_rockchip_get_edid_dsc_info;
+	// plat_data->get_next_hdr_data =
+	// 	dw_hdmi_rockchip_get_next_hdr_data;
+	plat_data->get_link_cfg = dw_hdmi_rockchip_get_link_cfg;
+	plat_data->set_grf_cfg = rk3588_set_grf_cfg;
+	// plat_data->convert_to_split_mode = drm_mode_convert_to_split_mode;
+	// plat_data->convert_to_origin_mode = drm_mode_convert_to_origin_mode;
+
+	plat_data->property_ops = &dw_hdmi_rockchip_property_ops;
+
+	secondary = rockchip_hdmi_find_by_id(dev->driver, !hdmi->id);
+	/* If don't enable hdmi0 and hdmi1, we don't enable split mode */
+	if (hdmi->chip_data->split_mode && secondary) {

-	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
-	rockchip_drm_encoder_set_crtc_endpoint_id(&hdmi->encoder,
-						  dev->of_node, 0, 0);
+		/*
+		 * hdmi can only attach bridge and init encoder/connector in the
+		 * last bind hdmi in split mode, or hdmi->hdmi_qp will not be initialized
+		 * and plat_data->left/right will be null pointer. we must check if split
+		 * mode is on and determine the sequence of hdmi bind.
+		 */
+		if (device_property_read_bool(dev, "split-mode") ||
+		    device_property_read_bool(secondary->dev, "split-mode")) {
+			plat_data->split_mode = true;
+			secondary->plat_data->split_mode = true;
+			if (!secondary->plat_data->first_screen)
+				plat_data->first_screen = true;
+		}
+
+		if (device_property_read_bool(dev, "user-split-mode") ||
+		    device_property_read_bool(secondary->dev, "user-split-mode")) {
+			hdmi->user_split_mode = true;
+			secondary->user_split_mode = true;
+		}
+	}
+
+	if (!plat_data->first_screen) {
+		encoder = &hdmi->encoder.encoder;
+		encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node);
+		rockchip_drm_encoder_set_crtc_endpoint_id(&hdmi->encoder,
+							  dev->of_node, 0, 0);
+		/*
+		 * If we failed to find the CRTC(s) which this encoder is
+		 * supposed to be connected to, it's because the CRTC has
+		 * not been registered yet.  Defer probing, and hope that
+		 * the required CRTC is added later.
+		 */
+		if (encoder->possible_crtcs == 0)
+			return -EPROBE_DEFER;
+
+		drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
+		// [CC:] consider using drmm_simple_encoder_alloc()
+		drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
+	}
+
+	if (!plat_data->max_tmdsclk)
+		hdmi->max_tmdsclk = 594000;
+	else
+		hdmi->max_tmdsclk = plat_data->max_tmdsclk;

-	/*
-	 * If we failed to find the CRTC(s) which this encoder is
-	 * supposed to be connected to, it's because the CRTC has
-	 * not been registered yet.  Defer probing, and hope that
-	 * the required CRTC is added later.
-	 */
-	if (encoder->possible_crtcs == 0)
-		return -EPROBE_DEFER;
+
+	hdmi->unsupported_yuv_input = plat_data->unsupported_yuv_input;
+	hdmi->unsupported_deep_color = plat_data->unsupported_deep_color;

 	ret = rockchip_hdmi_parse_dt(hdmi);
 	if (ret) {
@@ -596,34 +2932,44 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 		return ret;
 	}

-	hdmi->phy = devm_phy_optional_get(dev, "hdmi");
-	if (IS_ERR(hdmi->phy)) {
-		ret = PTR_ERR(hdmi->phy);
-		if (ret != -EPROBE_DEFER)
-			DRM_DEV_ERROR(hdmi->dev, "failed to get phy\n");
+	ret = clk_prepare_enable(hdmi->aud_clk);
+	if (ret) {
+		dev_err(hdmi->dev, "Failed to enable HDMI aud_clk: %d\n", ret);
 		return ret;
 	}

-	ret = regulator_enable(hdmi->avdd_0v9);
+	ret = clk_prepare_enable(hdmi->hpd_clk);
 	if (ret) {
-		DRM_DEV_ERROR(hdmi->dev, "failed to enable avdd0v9: %d\n", ret);
-		goto err_avdd_0v9;
+		dev_err(hdmi->dev, "Failed to enable HDMI hpd_clk: %d\n", ret);
+		return ret;
 	}

-	ret = regulator_enable(hdmi->avdd_1v8);
+	ret = clk_prepare_enable(hdmi->hclk_vo1);
 	if (ret) {
-		DRM_DEV_ERROR(hdmi->dev, "failed to enable avdd1v8: %d\n", ret);
-		goto err_avdd_1v8;
+		dev_err(hdmi->dev, "Failed to enable HDMI hclk_vo1: %d\n", ret);
+		return ret;
 	}

-	ret = clk_prepare_enable(hdmi->ref_clk);
+	ret = clk_prepare_enable(hdmi->earc_clk);
 	if (ret) {
-		DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI reference clock: %d\n",
-			      ret);
-		goto err_clk;
+		dev_err(hdmi->dev, "Failed to enable HDMI earc_clk: %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(hdmi->hdmitx_ref);
+	if (ret) {
+		dev_err(hdmi->dev, "Failed to enable HDMI hdmitx_ref: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(hdmi->pclk);
+	if (ret) {
+		dev_err(hdmi->dev, "Failed to enable HDMI pclk: %d\n", ret);
+		return ret;
 	}

-	if (hdmi->chip_data == &rk3568_chip_data) {
+	if (hdmi->chip_data->ddc_en_reg == RK3568_GRF_VO_CON1) {
 		regmap_write(hdmi->regmap, RK3568_GRF_VO_CON1,
 			     HIWORD_UPDATE(RK3568_HDMI_SDAIN_MSK |
 					   RK3568_HDMI_SCLIN_MSK,
@@ -631,12 +2977,131 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 					   RK3568_HDMI_SCLIN_MSK));
 	}

-	drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs);
-	drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS);
+	if (hdmi->is_hdmi_qp) {
+		if (!hdmi->id) {
+			val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) |
+			      HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) |
+			      HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) |
+			      HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+
+			val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK,
+					    RK3588_SET_HPD_PATH_MASK);
+			regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val);
+
+			val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL,
+					    RK3588_HDMI0_GRANT_SEL);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON9, val);
+		} else {
+			val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) |
+			      HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) |
+			      HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) |
+			      HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+
+			val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK,
+					    RK3588_SET_HPD_PATH_MASK);
+			regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val);
+
+			val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL,
+					    RK3588_HDMI1_GRANT_SEL);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON9, val);
+		}
+	}

-	platform_set_drvdata(pdev, hdmi);
+	ret = clk_prepare_enable(hdmi->hclk_vio);
+	if (ret) {
+		dev_err(hdmi->dev, "Failed to enable HDMI hclk_vio: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(hdmi->hclk_vop);
+	if (ret) {
+		dev_err(hdmi->dev, "Failed to enable HDMI hclk_vop: %d\n",
+			ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(hdmi->ref_clk);
+	if (ret) {
+		DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI reference clock: %d\n",
+			      ret);
+		goto err_clk;
+	}
+
+	ret = regulator_enable(hdmi->avdd_0v9);
+	if (ret) {
+		DRM_DEV_ERROR(hdmi->dev, "failed to enable avdd0v9: %d\n", ret);
+		goto err_avdd_0v9;
+	}
+
+	ret = regulator_enable(hdmi->avdd_1v8);
+	if (ret) {
+		DRM_DEV_ERROR(hdmi->dev, "failed to enable avdd1v8: %d\n", ret);
+		goto err_avdd_1v8;
+	}
+
+	if (!hdmi->id)
+		val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK);
+	else
+		val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, RK3588_HDMI1_HPD_INT_MSK);
+	regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val);
+
+	if (hdmi->is_hdmi_qp) {
+		hdmi->hpd_irq = platform_get_irq(pdev, 4);
+		if (hdmi->hpd_irq < 0)
+			return hdmi->hpd_irq;
+
+		ret = devm_request_threaded_irq(hdmi->dev, hdmi->hpd_irq,
+						rockchip_hdmi_hardirq,
+						rockchip_hdmi_irq,
+						IRQF_SHARED, "dw-hdmi-qp-hpd",
+						hdmi);
+		if (ret)
+			return ret;
+	}
+
+	hdmi->phy = devm_phy_optional_get(dev, "hdmi");
+	if (IS_ERR(hdmi->phy)) {
+		hdmi->phy = devm_phy_optional_get(dev, "hdmi_phy");
+		if (IS_ERR(hdmi->phy)) {
+			ret = PTR_ERR(hdmi->phy);
+			if (ret != -EPROBE_DEFER)
+				DRM_DEV_ERROR(hdmi->dev, "failed to get phy\n");
+			return ret;
+		}
+	}
+
+	if (hdmi->is_hdmi_qp) {
+		// [CC:] do proper error handling, e.g. clk_disable_unprepare
+		hdmi->hdmi_qp = dw_hdmi_qp_bind(pdev, &hdmi->encoder.encoder, plat_data);
+
+		if (IS_ERR(hdmi->hdmi_qp)) {
+			ret = PTR_ERR(hdmi->hdmi_qp);
+			drm_encoder_cleanup(&hdmi->encoder.encoder);
+		}
+
+		// if (plat_data->connector) {
+		// 	hdmi->sub_dev.connector = plat_data->connector;
+		// 	hdmi->sub_dev.of_node = dev->of_node;
+		// 	rockchip_drm_register_sub_dev(&hdmi->sub_dev);
+		// }
+
+		if (plat_data->split_mode && secondary) {
+			if (device_property_read_bool(dev, "split-mode")) {
+				plat_data->right = secondary->hdmi_qp;
+				secondary->plat_data->left = hdmi->hdmi_qp;
+			} else {
+				plat_data->left = secondary->hdmi_qp;
+				secondary->plat_data->right = hdmi->hdmi_qp;
+			}
+		}
+
+		return ret;
+	}

-	hdmi->hdmi = dw_hdmi_bind(pdev, encoder, plat_data);
+	hdmi->hdmi = dw_hdmi_bind(pdev, &hdmi->encoder.encoder, plat_data);

 	/*
 	 * If dw_hdmi_bind() fails we'll never call dw_hdmi_unbind(),
@@ -647,11 +3112,24 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 		goto err_bind;
 	}

+	// if (plat_data->connector) {
+	// 	hdmi->sub_dev.connector = plat_data->connector;
+	// 	hdmi->sub_dev.of_node = dev->of_node;
+	// 	rockchip_drm_register_sub_dev(&hdmi->sub_dev);
+	// }
+
 	return 0;

 err_bind:
-	drm_encoder_cleanup(encoder);
+	drm_encoder_cleanup(&hdmi->encoder.encoder);
+	clk_disable_unprepare(hdmi->aud_clk);
 	clk_disable_unprepare(hdmi->ref_clk);
+	clk_disable_unprepare(hdmi->hclk_vop);
+	clk_disable_unprepare(hdmi->hpd_clk);
+	clk_disable_unprepare(hdmi->hclk_vo1);
+	clk_disable_unprepare(hdmi->earc_clk);
+	clk_disable_unprepare(hdmi->hdmitx_ref);
+	clk_disable_unprepare(hdmi->pclk);
 err_clk:
 	regulator_disable(hdmi->avdd_1v8);
 err_avdd_1v8:
@@ -665,9 +3143,29 @@ static void dw_hdmi_rockchip_unbind(struct device *dev, struct device *master,
 {
 	struct rockchip_hdmi *hdmi = dev_get_drvdata(dev);

-	dw_hdmi_unbind(hdmi->hdmi);
+	if (hdmi->is_hdmi_qp) {
+		cancel_delayed_work(&hdmi->work);
+		flush_workqueue(hdmi->workqueue);
+	}
+
+	// if (hdmi->sub_dev.connector)
+	// 	rockchip_drm_unregister_sub_dev(&hdmi->sub_dev);
+	//
+	if (hdmi->is_hdmi_qp)
+		dw_hdmi_qp_unbind(hdmi->hdmi_qp);
+	else
+		dw_hdmi_unbind(hdmi->hdmi);
+
 	drm_encoder_cleanup(&hdmi->encoder.encoder);
+
+	clk_disable_unprepare(hdmi->aud_clk);
 	clk_disable_unprepare(hdmi->ref_clk);
+	clk_disable_unprepare(hdmi->hclk_vop);
+	clk_disable_unprepare(hdmi->hpd_clk);
+	clk_disable_unprepare(hdmi->hclk_vo1);
+	clk_disable_unprepare(hdmi->earc_clk);
+	clk_disable_unprepare(hdmi->hdmitx_ref);
+	clk_disable_unprepare(hdmi->pclk);

 	regulator_disable(hdmi->avdd_1v8);
 	regulator_disable(hdmi->avdd_0v9);
@@ -680,30 +3178,142 @@ static const struct component_ops dw_hdmi_rockchip_ops = {

 static int dw_hdmi_rockchip_probe(struct platform_device *pdev)
 {
+	struct rockchip_hdmi *hdmi;
+	const struct of_device_id *match;
+	struct dw_hdmi_plat_data *plat_data;
+	int id;
+
+	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return -ENOMEM;
+
+	id = of_alias_get_id(pdev->dev.of_node, "hdmi");
+	if (id < 0)
+		id = 0;
+
+	hdmi->id = id;
+	hdmi->dev = &pdev->dev;
+
+	match = of_match_node(dw_hdmi_rockchip_dt_ids, pdev->dev.of_node);
+	plat_data = devm_kmemdup(&pdev->dev, match->data,
+				 sizeof(*plat_data), GFP_KERNEL);
+	if (!plat_data)
+		return -ENOMEM;
+
+	plat_data->id = hdmi->id;
+	hdmi->plat_data = plat_data;
+	hdmi->chip_data = plat_data->phy_data;
+	hdmi->is_hdmi_qp = plat_data->is_hdmi_qp;
+
+	if (hdmi->is_hdmi_qp) {
+		hdmi->workqueue = create_workqueue("hpd_queue");
+		INIT_DELAYED_WORK(&hdmi->work, repo_hpd_event);
+	}
+
+	platform_set_drvdata(pdev, hdmi);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
 	return component_add(&pdev->dev, &dw_hdmi_rockchip_ops);
 }

+static void dw_hdmi_rockchip_shutdown(struct platform_device *pdev)
+{
+	struct rockchip_hdmi *hdmi = dev_get_drvdata(&pdev->dev);
+
+	if (!hdmi)
+		return;
+
+	if (hdmi->is_hdmi_qp) {
+		cancel_delayed_work(&hdmi->work);
+		flush_workqueue(hdmi->workqueue);
+		dw_hdmi_qp_suspend(hdmi->dev, hdmi->hdmi_qp);
+	} else {
+		dw_hdmi_suspend(hdmi->hdmi);
+	}
+	pm_runtime_put_sync(&pdev->dev);
+}
+
 static void dw_hdmi_rockchip_remove(struct platform_device *pdev)
 {
+	struct rockchip_hdmi *hdmi = dev_get_drvdata(&pdev->dev);
+
 	component_del(&pdev->dev, &dw_hdmi_rockchip_ops);
+	pm_runtime_disable(&pdev->dev);
+
+	if (hdmi->is_hdmi_qp)
+		destroy_workqueue(hdmi->workqueue);
+}
+
+static int __maybe_unused dw_hdmi_rockchip_suspend(struct device *dev)
+{
+	struct rockchip_hdmi *hdmi = dev_get_drvdata(dev);
+
+	if (hdmi->is_hdmi_qp)
+		dw_hdmi_qp_suspend(dev, hdmi->hdmi_qp);
+	else
+		dw_hdmi_suspend(hdmi->hdmi);
+
+	pm_runtime_put_sync(dev);
+
+	return 0;
 }

 static int __maybe_unused dw_hdmi_rockchip_resume(struct device *dev)
 {
 	struct rockchip_hdmi *hdmi = dev_get_drvdata(dev);
+	u32 val;

-	dw_hdmi_resume(hdmi->hdmi);
+	if (hdmi->is_hdmi_qp) {
+		if (!hdmi->id) {
+			val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) |
+			      HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) |
+			      HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) |
+			      HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON3, val);
+
+			val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK,
+					    RK3588_SET_HPD_PATH_MASK);
+			regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val);
+
+			val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL,
+					    RK3588_HDMI0_GRANT_SEL);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON9, val);
+		} else {
+			val = HIWORD_UPDATE(RK3588_SCLIN_MASK, RK3588_SCLIN_MASK) |
+			      HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) |
+			      HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) |
+			      HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON6, val);
+
+			val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK,
+					    RK3588_SET_HPD_PATH_MASK);
+			regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val);
+
+			val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL,
+					    RK3588_HDMI1_GRANT_SEL);
+			regmap_write(hdmi->vo1_regmap, RK3588_GRF_VO1_CON9, val);
+		}
+
+		dw_hdmi_qp_resume(dev, hdmi->hdmi_qp);
+		drm_helper_hpd_irq_event(hdmi->drm_dev);
+	} else {
+		dw_hdmi_resume(hdmi->hdmi);
+	}
+	pm_runtime_get_sync(dev);

 	return 0;
 }

 static const struct dev_pm_ops dw_hdmi_rockchip_pm = {
-	SET_SYSTEM_SLEEP_PM_OPS(NULL, dw_hdmi_rockchip_resume)
+	SET_SYSTEM_SLEEP_PM_OPS(dw_hdmi_rockchip_suspend,
+				dw_hdmi_rockchip_resume)
 };

 struct platform_driver dw_hdmi_rockchip_pltfm_driver = {
 	.probe  = dw_hdmi_rockchip_probe,
 	.remove_new = dw_hdmi_rockchip_remove,
+	.shutdown = dw_hdmi_rockchip_shutdown,
 	.driver = {
 		.name = "dwhdmi-rockchip",
 		.pm = &dw_hdmi_rockchip_pm,
diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h
index 6a46baa0737c..ac4e418c1c4e 100644
--- a/include/drm/bridge/dw_hdmi.h
+++ b/include/drm/bridge/dw_hdmi.h
@@ -6,12 +6,14 @@
 #ifndef __DW_HDMI__
 #define __DW_HDMI__

+#include <drm/drm_property.h>
 #include <sound/hdmi-codec.h>

 struct drm_display_info;
 struct drm_display_mode;
 struct drm_encoder;
 struct dw_hdmi;
+struct dw_hdmi_qp;
 struct platform_device;

 /**
@@ -92,6 +94,13 @@ enum dw_hdmi_phy_type {
 	DW_HDMI_PHY_VENDOR_PHY = 0xfe,
 };

+struct dw_hdmi_audio_tmds_n {
+	unsigned long tmds;
+	unsigned int n_32k;
+	unsigned int n_44k1;
+	unsigned int n_48k;
+};
+
 struct dw_hdmi_mpll_config {
 	unsigned long mpixelclock;
 	struct {
@@ -112,6 +121,15 @@ struct dw_hdmi_phy_config {
 	u16 vlev_ctr;   /* voltage level control */
 };

+struct dw_hdmi_link_config {
+	bool dsc_mode;
+	bool frl_mode;
+	int frl_lanes;
+	int rate_per_lane;
+	int hcactive;
+	u8 pps_payload[128];
+};
+
 struct dw_hdmi_phy_ops {
 	int (*init)(struct dw_hdmi *hdmi, void *data,
 		    const struct drm_display_info *display,
@@ -123,14 +141,52 @@ struct dw_hdmi_phy_ops {
 	void (*setup_hpd)(struct dw_hdmi *hdmi, void *data);
 };

+struct dw_hdmi_qp_phy_ops {
+	int (*init)(struct dw_hdmi_qp *hdmi, void *data,
+		    struct drm_display_mode *mode);
+	void (*disable)(struct dw_hdmi_qp *hdmi, void *data);
+	enum drm_connector_status (*read_hpd)(struct dw_hdmi_qp *hdmi,
+					      void *data);
+	void (*update_hpd)(struct dw_hdmi_qp *hdmi, void *data,
+			   bool force, bool disabled, bool rxsense);
+	void (*setup_hpd)(struct dw_hdmi_qp *hdmi, void *data);
+	void (*set_mode)(struct dw_hdmi_qp *dw_hdmi, void *data,
+			 u32 mode_mask, bool enable);
+};
+
+struct dw_hdmi_property_ops {
+	void (*attach_properties)(struct drm_connector *connector,
+				  unsigned int color, int version,
+				  void *data);
+	void (*destroy_properties)(struct drm_connector *connector,
+				   void *data);
+	int (*set_property)(struct drm_connector *connector,
+			    struct drm_connector_state *state,
+			    struct drm_property *property,
+			    u64 val,
+			    void *data);
+	int (*get_property)(struct drm_connector *connector,
+			    const struct drm_connector_state *state,
+			    struct drm_property *property,
+			    u64 *val,
+			    void *data);
+};
+
 struct dw_hdmi_plat_data {
 	struct regmap *regm;

+	//[CC:] not in dowstream
 	unsigned int output_port;

+	unsigned long input_bus_format;
 	unsigned long input_bus_encoding;
+	unsigned int max_tmdsclk;
+	int id;
 	bool use_drm_infoframe;
 	bool ycbcr_420_allowed;
+	bool unsupported_yuv_input;
+	bool unsupported_deep_color;
+	bool is_hdmi_qp;

 	/*
 	 * Private data passed to all the .mode_valid() and .configure_phy()
@@ -139,6 +195,7 @@ struct dw_hdmi_plat_data {
 	void *priv_data;

 	/* Platform-specific mode validation (optional). */
+	//[CC:] downstream changed "struct dw_hdmi *hdmi" to "struct drm_connector *connector"
 	enum drm_mode_status (*mode_valid)(struct dw_hdmi *hdmi, void *data,
 					   const struct drm_display_info *info,
 					   const struct drm_display_mode *mode);
@@ -150,18 +207,50 @@ struct dw_hdmi_plat_data {

 	/* Vendor PHY support */
 	const struct dw_hdmi_phy_ops *phy_ops;
+	const struct dw_hdmi_qp_phy_ops *qp_phy_ops;
 	const char *phy_name;
 	void *phy_data;
 	unsigned int phy_force_vendor;

+	/* split mode */
+	bool split_mode;
+	bool first_screen;
+	struct dw_hdmi_qp *left;
+	struct dw_hdmi_qp *right;
+
 	/* Synopsys PHY support */
 	const struct dw_hdmi_mpll_config *mpll_cfg;
+	const struct dw_hdmi_mpll_config *mpll_cfg_420;
 	const struct dw_hdmi_curr_ctrl *cur_ctr;
 	const struct dw_hdmi_phy_config *phy_config;
 	int (*configure_phy)(struct dw_hdmi *hdmi, void *data,
 			     unsigned long mpixelclock);

 	unsigned int disable_cec : 1;
+
+	//[CC:] 7b29b5f29585 ("drm/rockchip: dw_hdmi: Support HDMI 2.0 YCbCr 4:2:0")
+	unsigned long (*get_input_bus_format)(void *data);
+	unsigned long (*get_output_bus_format)(void *data);
+	unsigned long (*get_enc_in_encoding)(void *data);
+	unsigned long (*get_enc_out_encoding)(void *data);
+
+	unsigned long (*get_quant_range)(void *data);
+	struct drm_property *(*get_hdr_property)(void *data);
+	struct drm_property_blob *(*get_hdr_blob)(void *data);
+	bool (*get_color_changed)(void *data);
+	int (*get_yuv422_format)(struct drm_connector *connector,
+				 struct edid *edid);
+	int (*get_edid_dsc_info)(void *data, struct edid *edid);
+	int (*get_next_hdr_data)(void *data, struct edid *edid,
+				 struct drm_connector *connector);
+	struct dw_hdmi_link_config *(*get_link_cfg)(void *data);
+	void (*set_grf_cfg)(void *data);
+	void (*convert_to_split_mode)(struct drm_display_mode *mode);
+	void (*convert_to_origin_mode)(struct drm_display_mode *mode);
+
+	/* Vendor Property support */
+	const struct dw_hdmi_property_ops *property_ops;
+	struct drm_connector *connector;
 };

 struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev,
@@ -172,6 +261,7 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev,
 			     struct drm_encoder *encoder,
 			     const struct dw_hdmi_plat_data *plat_data);

+void dw_hdmi_suspend(struct dw_hdmi *hdmi);
 void dw_hdmi_resume(struct dw_hdmi *hdmi);

 void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense);
@@ -205,6 +295,17 @@ enum drm_connector_status dw_hdmi_phy_read_hpd(struct dw_hdmi *hdmi,
 void dw_hdmi_phy_update_hpd(struct dw_hdmi *hdmi, void *data,
 			    bool force, bool disabled, bool rxsense);
 void dw_hdmi_phy_setup_hpd(struct dw_hdmi *hdmi, void *data);
+void dw_hdmi_set_quant_range(struct dw_hdmi *hdmi);
+void dw_hdmi_set_output_type(struct dw_hdmi *hdmi, u64 val);
+bool dw_hdmi_get_output_whether_hdmi(struct dw_hdmi *hdmi);
+int dw_hdmi_get_output_type_cap(struct dw_hdmi *hdmi);
+
+void dw_hdmi_qp_unbind(struct dw_hdmi_qp *hdmi);
+struct dw_hdmi_qp *dw_hdmi_qp_bind(struct platform_device *pdev,
+				struct drm_encoder *encoder,
+				struct dw_hdmi_plat_data *plat_data);
+void dw_hdmi_qp_suspend(struct device *dev, struct dw_hdmi_qp *hdmi);
+void dw_hdmi_qp_resume(struct device *dev, struct dw_hdmi_qp *hdmi);

 bool dw_hdmi_bus_fmt_is_420(struct dw_hdmi *hdmi);

--
2.42.0


From 8fff68cb7cfe1e698445896252e34f79fad41720 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Wed, 27 Mar 2024 20:36:15 +0200
Subject: [PATCH 54/69] [WIP] dt-bindings: display: rockchip-drm: Add optional
 clocks property

Allow using the clock provided by HDMI0 PHY PLL to improve HDMI output
support on RK3588 SoC.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../bindings/display/rockchip/rockchip-drm.yaml           | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip-drm.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip-drm.yaml
index a8d18a37cb23..9d000760dd6e 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-drm.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-drm.yaml
@@ -28,6 +28,14 @@ properties:
       of vop devices. vop definitions as defined in
       Documentation/devicetree/bindings/display/rockchip/rockchip-vop.yaml

+  clocks:
+    maxItems: 1
+    description: Optional clock provided by HDMI0 PLL
+
+  clock-names:
+    items:
+      - const: hdmi0_phy_pll
+
 required:
   - compatible
   - ports
--
2.42.0


From bf6e1b56a0577c0e3d61d6f6e138afd56578182b Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Thu, 28 Mar 2024 00:47:03 +0200
Subject: [PATCH 55/69] [WIP] dt-bindings: display: rockchip,dw-hdmi: Add
 compatible for RK3588

Document the DW HDMI TX Controller found on Rockchip RK3588 SoC.

Since RK3588 uses different clocks than previous Rockchip SoCs and also
requires a couple of reset lines and some additional properties, provide
the required changes in the binding to be able to handle all variants.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../display/bridge/synopsys,dw-hdmi.yaml      |  10 +-
 .../display/rockchip/rockchip,dw-hdmi.yaml    | 126 +++++++++++++-----
 2 files changed, 97 insertions(+), 39 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml
index 4b7e54a8f037..f69f41a8ec25 100644
--- a/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/synopsys,dw-hdmi.yaml
@@ -31,7 +31,7 @@ properties:

   clocks:
     minItems: 2
-    maxItems: 5
+    maxItems: 10
     items:
       - description: The bus clock for either AHB and APB
       - description: The internal register configuration clock
@@ -39,14 +39,12 @@ properties:

   clock-names:
     minItems: 2
-    maxItems: 5
-    items:
-      - const: iahb
-      - const: isfr
+    maxItems: 10
     additionalItems: true

   interrupts:
-    maxItems: 1
+    minItems: 1
+    maxItems: 5

 additionalProperties: true

diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
index af638b6c0d21..fadd59106d8b 100644
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,dw-hdmi.yaml
@@ -13,9 +13,6 @@ description: |
   The HDMI transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
   with a companion PHY IP.

-allOf:
-  - $ref: ../bridge/synopsys,dw-hdmi.yaml#
-
 properties:
   compatible:
     enum:
@@ -24,6 +21,7 @@ properties:
       - rockchip,rk3328-dw-hdmi
       - rockchip,rk3399-dw-hdmi
       - rockchip,rk3568-dw-hdmi
+      - rockchip,rk3588-dw-hdmi

   reg-io-width:
     const: 4
@@ -39,36 +37,6 @@ properties:
       A 1.8V supply that powers up the SoC internal circuitry. The pin name on the
       SoC usually is HDMI_TX_AVDD_1V8.

-  clocks:
-    minItems: 2
-    items:
-      - {}
-      - {}
-      # The next three clocks are all optional, but shall be specified in this
-      # order when present.
-      - description: The HDMI CEC controller main clock
-      - description: Power for GRF IO
-      - description: External clock for some HDMI PHY (old clock name, deprecated)
-      - description: External clock for some HDMI PHY (new name)
-
-  clock-names:
-    minItems: 2
-    items:
-      - {}
-      - {}
-      - enum:
-          - cec
-          - grf
-          - vpll
-          - ref
-      - enum:
-          - grf
-          - vpll
-          - ref
-      - enum:
-          - vpll
-          - ref
-
   ddc-i2c-bus:
     $ref: /schemas/types.yaml#/definitions/phandle
     description:
@@ -134,6 +102,98 @@ required:
   - ports
   - rockchip,grf

+allOf:
+  - $ref: ../bridge/synopsys,dw-hdmi.yaml#
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - rockchip,rk3228-dw-hdmi
+              - rockchip,rk3288-dw-hdmi
+              - rockchip,rk3328-dw-hdmi
+              - rockchip,rk3399-dw-hdmi
+              - rockchip,rk3568-dw-hdmi
+    then:
+      properties:
+        clocks:
+          minItems: 2
+          items:
+            - {}
+            - {}
+            # The next three clocks are all optional, but shall be specified in this
+            # order when present.
+            - description: The HDMI CEC controller main clock
+            - description: Power for GRF IO
+            - description: External clock for some HDMI PHY (old clock name, deprecated)
+            - description: External clock for some HDMI PHY (new name)
+
+        clock-names:
+          minItems: 2
+          items:
+            - const: iahb
+            - const: isfr
+            - enum:
+                - cec
+                - grf
+                - vpll
+                - ref
+            - enum:
+                - grf
+                - vpll
+                - ref
+            - enum:
+                - vpll
+                - ref
+
+        interrupts:
+          maxItems: 1
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - rockchip,rk3588-dw-hdmi
+    then:
+      properties:
+        clocks:
+          minItems: 10
+          maxItems: 10
+
+        clock-names:
+          items:
+            - const: pclk
+            - const: hpd
+            - const: earc
+            - const: hdmitx_ref
+            - const: aud
+            - const: dclk_vp0
+            - const: dclk_vp1
+            - const: dclk_vp2
+            - const: dclk_vp3
+            - const: hclk_vo1
+
+        resets:
+          minItems: 2
+          maxItems: 2
+
+        reset-names:
+          items:
+            - const: ref
+            - const: hdp
+
+        rockchip,vo1_grf:
+          $ref: /schemas/types.yaml#/definitions/phandle
+          description:
+            phandle to the VO1 GRF
+
+      required:
+        - resets
+        - reset-names
+        - rockchip,vo1_grf
+
 unevaluatedProperties: false

 examples:
--
2.42.0


From 2eff5b5149f34d69e3279a1733c6b95247a9217b Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Mon, 15 Jan 2024 22:47:41 +0200
Subject: [PATCH 56/69] arm64: dts: rockchip: Add HDMI0 bridge to rk3588

Add DT node for the HDMI0 bridge found on RK3588 SoC.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 55 +++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index d0d47ac2e3cc..8618b7919e70 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -1464,6 +1464,61 @@ i2s9_8ch: i2s@fddfc000 {
 		status = "disabled";
 	};

+	hdmi0: hdmi@fde80000 {
+		compatible = "rockchip,rk3588-dw-hdmi";
+		reg = <0x0 0xfde80000 0x0 0x20000>;
+		interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 170 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 172 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 360 IRQ_TYPE_LEVEL_HIGH 0>;
+		clocks = <&cru PCLK_HDMITX0>,
+			 <&cru CLK_HDMIHDP0>,
+			 <&cru CLK_HDMITX0_EARC>,
+			 <&cru CLK_HDMITX0_REF>,
+			 <&cru MCLK_I2S5_8CH_TX>,
+			 <&cru DCLK_VOP0>,
+			 <&cru DCLK_VOP1>,
+			 <&cru DCLK_VOP2>,
+			 <&cru DCLK_VOP3>,
+			 <&cru HCLK_VO1>;
+		clock-names = "pclk",
+			      "hpd",
+			      "earc",
+			      "hdmitx_ref",
+			      "aud",
+			      "dclk_vp0",
+			      "dclk_vp1",
+			      "dclk_vp2",
+			      "dclk_vp3",
+			      "hclk_vo1";
+		resets = <&cru SRST_HDMITX0_REF>, <&cru SRST_HDMIHDP0>;
+		reset-names = "ref", "hdp";
+		power-domains = <&power RK3588_PD_VO1>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&hdmim0_tx0_cec &hdmim0_tx0_hpd
+			     &hdmim0_tx0_scl &hdmim0_tx0_sda>;
+		reg-io-width = <4>;
+		rockchip,grf = <&sys_grf>;
+		rockchip,vo1_grf = <&vo1_grf>;
+		phys = <&hdptxphy_hdmi0>;
+		phy-names = "hdmi";
+		status = "disabled";
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			hdmi0_in: port@0 {
+				reg = <0>;
+			};
+
+			hdmi0_out: port@1 {
+				reg = <1>;
+			};
+		};
+	};
+
 	qos_gpu_m0: qos@fdf35000 {
 		compatible = "rockchip,rk3588-qos", "syscon";
 		reg = <0x0 0xfdf35000 0x0 0x20>;
--
2.42.0


From acd56fb6791f30483dbbaf6f40df514004b49e32 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Mon, 15 Jan 2024 22:51:17 +0200
Subject: [PATCH 57/69] arm64: dts: rockchip: Enable HDMI0 on rock-5b

Add the necessary DT changes to enable HDMI0 on Rock 5B.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../boot/dts/rockchip/rk3588-rock-5b.dts      | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index fa3b91613a24..467ef01107ac 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -4,6 +4,7 @@

 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/leds/common.h>
+#include <dt-bindings/soc/rockchip,vop2.h>
 #include <dt-bindings/usb/pd.h>
 #include "rk3588.dtsi"

@@ -196,6 +197,20 @@ &gpu {
 	status = "okay";
 };

+&hdmi0 {
+	status = "okay";
+};
+
+&hdmi0_in {
+	hdmi0_in_vp0: endpoint {
+		remote-endpoint = <&vp0_out_hdmi0>;
+	};
+};
+
+&hdptxphy_hdmi0 {
+	status = "okay";
+};
+
 &i2c0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0m2_xfer>;
@@ -945,3 +960,18 @@ &usb_host1_xhci {
 &usb_host2_xhci {
 	status = "okay";
 };
+
+&vop_mmu {
+	status = "okay";
+};
+
+&vop {
+	status = "okay";
+};
+
+&vp0 {
+	vp0_out_hdmi0: endpoint@ROCKCHIP_VOP2_EP_HDMI0 {
+		reg = <ROCKCHIP_VOP2_EP_HDMI0>;
+		remote-endpoint = <&hdmi0_in_vp0>;
+	};
+};
--
2.42.0


From 69e74b8a9206571d8174d3260bcbc81e7f92d729 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Wed, 17 Jan 2024 01:53:38 +0200
Subject: [PATCH 58/69] arm64: dts: rockchip: Enable HDMI0 on rk3588-evb1

Add the necessary DT changes to enable HDMI0 on Rockchip RK3588 EVB1.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 .../boot/dts/rockchip/rk3588-evb1-v10.dts     | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index f76748f2136c..859c86fccf16 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -9,6 +9,7 @@
 #include <dt-bindings/gpio/gpio.h>
 #include <dt-bindings/input/input.h>
 #include <dt-bindings/pinctrl/rockchip.h>
+#include <dt-bindings/soc/rockchip,vop2.h>
 #include <dt-bindings/usb/pd.h>
 #include "rk3588.dtsi"

@@ -342,6 +343,20 @@ &gpu {
 	status = "okay";
 };

+&hdmi0 {
+	status = "okay";
+};
+
+&hdmi0_in {
+	hdmi0_in_vp0: endpoint {
+		remote-endpoint = <&vp0_out_hdmi0>;
+	};
+};
+
+&hdptxphy_hdmi0 {
+	status = "okay";
+};
+
 &i2c2 {
 	status = "okay";

@@ -1332,3 +1347,18 @@ &usb_host1_xhci {
 	dr_mode = "host";
 	status = "okay";
 };
+
+&vop_mmu {
+	status = "okay";
+};
+
+&vop {
+	status = "okay";
+};
+
+&vp0 {
+	vp0_out_hdmi0: endpoint@ROCKCHIP_VOP2_EP_HDMI0 {
+		reg = <ROCKCHIP_VOP2_EP_HDMI0>;
+		remote-endpoint = <&hdmi0_in_vp0>;
+	};
+};
--
2.42.0


From d158fef0facb56049114850385b24b23c33fd72a Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Tue, 16 Jan 2024 03:13:38 +0200
Subject: [PATCH 59/69] arm64: dts: rockchip: Enable HDMI0 PHY clk provider on
 rk3588

The HDMI0 PHY can be used as a clock provider on RK3588, hence add the
missing #clock-cells property.
---
 arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
index 8618b7919e70..afba0bb98119 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi
@@ -2926,6 +2926,7 @@ hdptxphy_hdmi0: phy@fed60000 {
 		reg = <0x0 0xfed60000 0x0 0x2000>;
 		clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>;
 		clock-names = "ref", "apb";
+		#clock-cells = <0>;
 		#phy-cells = <0>;
 		resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>,
 			 <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>,
--
2.42.0


From 32da012ea234e13b5d13d003eac5f98e68792bb9 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Fri, 3 Nov 2023 20:05:05 +0200
Subject: [PATCH 60/69] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on
 rock-5b

The initial vop2 support for rk3588 in mainline is not able to handle
all display modes supported by connected displays, e.g.
2560x1440-75.00Hz, 2048x1152-60.00Hz, 1024x768-60.00Hz.

Additionally, it doesn't cope with non-integer refresh rates like 59.94,
29.97, 23.98, etc.

Make use of the HDMI0 PHY PLL to support the additional display modes.

Note this requires commit "drm/rockchip: vop2: Improve display modes
handling on rk3588", which needs a rework to be upstreamable.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index 467ef01107ac..07dd3a9d337d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -192,6 +192,11 @@ &cpu_l3 {
 	cpu-supply = <&vdd_cpu_lit_s0>;
 };

+&display_subsystem {
+	clocks = <&hdptxphy_hdmi0>;
+	clock-names = "hdmi0_phy_pll";
+};
+
 &gpu {
 	mali-supply = <&vdd_gpu_s0>;
 	status = "okay";
--
2.42.0


From 53f231d5a82308c1588c7b4281d5edc5820f96c4 Mon Sep 17 00:00:00 2001
From: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
Date: Wed, 17 Jan 2024 02:00:41 +0200
Subject: [PATCH 61/69] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on
 rk3588-evb1

The initial vop2 support for rk3588 in mainline is not able to handle
all display modes supported by connected displays, e.g.
2560x1440-75.00Hz, 2048x1152-60.00Hz, 1024x768-60.00Hz.

Additionally, it doesn't cope with non-integer refresh rates like 59.94,
29.97, 23.98, etc.

Make use of the HDMI0 PHY PLL to support the additional display modes.

Note this requires commit "drm/rockchip: vop2: Improve display modes
handling on rk3588", which needs a rework to be upstreamable.

Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
index 859c86fccf16..c9ad921f3c28 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts
@@ -322,6 +322,11 @@ &cpu_l3 {
 	cpu-supply = <&vdd_cpu_lit_s0>;
 };

+&display_subsystem {
+	clocks = <&hdptxphy_hdmi0>;
+	clock-names = "hdmi0_phy_pll";
+};
+
 &gmac0 {
 	clock_in_out = "output";
 	phy-handle = <&rgmii_phy>;
--
2.42.0


From 26a101fd68fef930acb49e281ce4c328704918b4 Mon Sep 17 00:00:00 2001
From: "Carsten Haitzler (Rasterman)" <raster@rasterman.com>
Date: Tue, 6 Feb 2024 10:12:54 +0000
Subject: [PATCH 62/69] arm64: dts: rockchip: Slow down EMMC a bit to keep IO
 stable

This drops to hs200 mode and 150Mhz as this is actually stable across
eMMC modules. There exist some that are incompatible at higher rates
with the rk3588 and to avoid your filesystem corrupting due to IO
errors, be more conservative and reduce the max. speed.

Signed-off-by: Carsten Haitzler <raster@rasterman.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index 07dd3a9d337d..dc3567a7c9ee 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -489,8 +489,8 @@ &sdhci {
 	no-sdio;
 	no-sd;
 	non-removable;
-	mmc-hs400-1_8v;
-	mmc-hs400-enhanced-strobe;
+	max-frequency = <150000000>;
+	mmc-hs200-1_8v;
 	status = "okay";
 };

--
2.42.0


From 9ad1e64bf23485d6a9cfb1dc1471087ec8951a07 Mon Sep 17 00:00:00 2001
From: Shreeya Patel <shreeya.patel@collabora.com>
Date: Wed, 14 Jun 2023 15:06:37 +0530
Subject: [PATCH 63/69] clk: rockchip: rst-rk3588: Add BIU reset

Export hdmirx_biu soft reset id which is required by the hdmirx controller.

Signed-off-by: Shreeya Patel <shreeya.patel@collabora.com>
---
 drivers/clk/rockchip/rst-rk3588.c               | 1 +
 include/dt-bindings/reset/rockchip,rk3588-cru.h | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/clk/rockchip/rst-rk3588.c b/drivers/clk/rockchip/rst-rk3588.c
index e855bb8d5413..c4ebc01f1c9c 100644
--- a/drivers/clk/rockchip/rst-rk3588.c
+++ b/drivers/clk/rockchip/rst-rk3588.c
@@ -577,6 +577,7 @@ static const int rk3588_register_offset[] = {

 	/* SOFTRST_CON59 */
 	RK3588_CRU_RESET_OFFSET(SRST_A_HDCP1_BIU, 59, 6),
+	RK3588_CRU_RESET_OFFSET(SRST_A_HDMIRX_BIU, 59, 7),
 	RK3588_CRU_RESET_OFFSET(SRST_A_VO1_BIU, 59, 8),
 	RK3588_CRU_RESET_OFFSET(SRST_H_VOP1_BIU, 59, 9),
 	RK3588_CRU_RESET_OFFSET(SRST_H_VOP1_S_BIU, 59, 10),
diff --git a/include/dt-bindings/reset/rockchip,rk3588-cru.h b/include/dt-bindings/reset/rockchip,rk3588-cru.h
index d4264db2a07f..e2fe4bd5f7f0 100644
--- a/include/dt-bindings/reset/rockchip,rk3588-cru.h
+++ b/include/dt-bindings/reset/rockchip,rk3588-cru.h
@@ -751,4 +751,6 @@
 #define SRST_P_TRNG_CHK			658
 #define SRST_TRNG_S			659

+#define SRST_A_HDMIRX_BIU		660
+
 #endif
--
2.42.0


From 6523d3d842443511e4cfb6c0678e3058d0fefa14 Mon Sep 17 00:00:00 2001
From: Shreeya Patel <shreeya.patel@collabora.com>
Date: Wed, 20 Dec 2023 18:30:13 +0530
Subject: [PATCH 64/69] dt-bindings: media: Document bindings for HDMI RX
 Controller

Document bindings for the Synopsys DesignWare HDMI RX Controller.

Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Shreeya Patel <shreeya.patel@collabora.com>
---
 .../bindings/media/snps,dw-hdmi-rx.yaml       | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/Documentation/devicetree/bindings/media/snps,dw-hdmi-rx.yaml b/Documentation/devicetree/bindings/media/snps,dw-hdmi-rx.yaml
new file mode 100644
index 000000000000..903aa62d33ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/snps,dw-hdmi-rx.yaml
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-3.0 OR BSD-2-Clause)
+# Device Tree bindings for Synopsys DesignWare HDMI RX Controller
+
+---
+$id: http://devicetree.org/schemas/media/snps,dw-hdmi-rx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare HDMI RX Controller
+
+maintainers:
+  - Shreeya Patel <shreeya.patel@collabora.com>
+
+properties:
+  compatible:
+    items:
+      - const: rockchip,rk3588-hdmirx-ctrler
+      - const: snps,dw-hdmi-rx
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 3
+
+  interrupt-names:
+    items:
+      - const: cec
+      - const: hdmi
+      - const: dma
+
+  clocks:
+    maxItems: 7
+
+  clock-names:
+    items:
+      - const: aclk
+      - const: audio
+      - const: cr_para
+      - const: pclk
+      - const: ref
+      - const: hclk_s_hdmirx
+      - const: hclk_vo1
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 4
+
+  reset-names:
+    items:
+      - const: rst_a
+      - const: rst_p
+      - const: rst_ref
+      - const: rst_biu
+
+  pinctrl-names:
+    const: default
+
+  memory-region:
+    maxItems: 1
+
+  hdmirx-5v-detection-gpios:
+    description: GPIO specifier for 5V detection.
+    maxItems: 1
+
+  rockchip,grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      The phandle of the syscon node for the GRF register.
+
+  rockchip,vo1_grf:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      The phandle of the syscon node for the VO1 GRF register.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - pinctrl-0
+  - pinctrl-names
+  - hdmirx-5v-detection-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/rockchip,rk3588-cru.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/power/rk3588-power.h>
+    #include <dt-bindings/reset/rockchip,rk3588-cru.h>
+    #include <dt-bindings/pinctrl/rockchip.h>
+    #include <dt-bindings/gpio/gpio.h>
+    hdmirx_ctrler: hdmirx-controller@fdee0000 {
+      compatible = "rockchip,rk3588-hdmirx-ctrler", "snps,dw-hdmi-rx";
+      reg = <0xfdee0000 0x6000>;
+      interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH 0>,
+                   <GIC_SPI 436 IRQ_TYPE_LEVEL_HIGH 0>,
+                   <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH 0>;
+      interrupt-names = "cec", "hdmi", "dma";
+      clocks = <&cru ACLK_HDMIRX>,
+               <&cru CLK_HDMIRX_AUD>,
+               <&cru CLK_CR_PARA>,
+               <&cru PCLK_HDMIRX>,
+               <&cru CLK_HDMIRX_REF>,
+               <&cru PCLK_S_HDMIRX>,
+               <&cru HCLK_VO1>;
+      clock-names = "aclk",
+                    "audio",
+                    "cr_para",
+                    "pclk",
+                    "ref",
+                    "hclk_s_hdmirx",
+                    "hclk_vo1";
+      power-domains = <&power RK3588_PD_VO1>;
+      resets = <&cru SRST_A_HDMIRX>, <&cru SRST_P_HDMIRX>,
+               <&cru SRST_HDMIRX_REF>, <&cru SRST_A_HDMIRX_BIU>;
+      reset-names = "rst_a", "rst_p", "rst_ref", "rst_biu";
+      pinctrl-0 = <&hdmim1_rx_cec &hdmim1_rx_hpdin &hdmim1_rx_scl &hdmim1_rx_sda &hdmirx_5v_detection>;
+      pinctrl-names = "default";
+      memory-region = <&hdmirx_cma>;
+      hdmirx-5v-detection-gpios = <&gpio1 RK_PC6 GPIO_ACTIVE_LOW>;
+    };
--
2.42.0


From eab397703aa8482ddcc31719a5ad5c57efc248f8 Mon Sep 17 00:00:00 2001
From: Shreeya Patel <shreeya.patel@collabora.com>
Date: Wed, 20 Dec 2023 16:50:14 +0530
Subject: [PATCH 65/69] arm64: dts: rockchip: Add device tree support for HDMI
 RX Controller

Add device tree support for Synopsys DesignWare HDMI RX
Controller.

Signed-off-by: Dingxian Wen <shawn.wen@rock-chips.com>
Co-developed-by: Shreeya Patel <shreeya.patel@collabora.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Shreeya Patel <shreeya.patel@collabora.com>
---
 .../boot/dts/rockchip/rk3588-pinctrl.dtsi     | 41 +++++++++++++++
 .../boot/dts/rockchip/rk3588-rock-5b.dts      | 18 +++++++
 arch/arm64/boot/dts/rockchip/rk3588.dtsi      | 50 +++++++++++++++++++
 3 files changed, 109 insertions(+)

diff --git a/arch/arm64/boot/dts/rockchip/rk3588-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-pinctrl.dtsi
index 244c66faa161..e5f3d0acbd55 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-pinctrl.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588-pinctrl.dtsi
@@ -169,6 +169,47 @@ hdmim0_tx1_sda: hdmim0-tx1-sda {
 				/* hdmim0_tx1_sda */
 				<2 RK_PB4 4 &pcfg_pull_none>;
 		};
+
+		/omit-if-no-ref/
+		hdmim1_rx: hdmim1-rx {
+			rockchip,pins =
+                                /* hdmim1_rx_cec */
+                                <3 RK_PD1 5 &pcfg_pull_none>,
+                                /* hdmim1_rx_scl */
+                                <3 RK_PD2 5 &pcfg_pull_none_smt>,
+                                /* hdmim1_rx_sda */
+                                <3 RK_PD3 5 &pcfg_pull_none_smt>,
+                                /* hdmim1_rx_hpdin */
+                                <3 RK_PD4 5 &pcfg_pull_none>;
+                };
+
+                /omit-if-no-ref/
+                hdmim1_rx_cec: hdmim1-rx-cec {
+                        rockchip,pins =
+                                /* hdmim1_rx_cec */
+                                <3 RK_PD1 5 &pcfg_pull_none>;
+                };
+
+                /omit-if-no-ref/
+                hdmim1_rx_hpdin: hdmim1-rx-hpdin {
+                        rockchip,pins =
+                                /* hdmim1_rx_hpdin */
+                                <3 RK_PD4 5 &pcfg_pull_none>;
+                };
+
+                /omit-if-no-ref/
+                hdmim1_rx_scl: hdmim1-rx-scl {
+                        rockchip,pins =
+                                /* hdmim1_rx_scl */
+                                <3 RK_PD2 5 &pcfg_pull_none>;
+                };
+
+                /omit-if-no-ref/
+                hdmim1_rx_sda: hdmim1-rx-sda {
+                        rockchip,pins =
+                                /* hdmim1_rx_sda */
+                                <3 RK_PD3 5 &pcfg_pull_none>;
+                };
 	};

 	i2c0 {
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index dc3567a7c9ee..9986316f283e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -216,6 +216,18 @@ &hdptxphy_hdmi0 {
 	status = "okay";
 };

+&hdmirx_cma {
+	status = "okay";
+};
+
+&hdmirx_ctrler {
+	status = "okay";
+	hdmirx-5v-detection-gpios = <&gpio1 RK_PC6 GPIO_ACTIVE_LOW>;
+	pinctrl-0 = <&hdmim1_rx_cec &hdmim1_rx_hpdin &hdmim1_rx_scl &hdmim1_rx_sda &hdmirx_5v_detection>;
+	pinctrl-names = "default";
+	memory-region = <&hdmirx_cma>;
+};
+
 &i2c0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&i2c0m2_xfer>;
@@ -422,6 +434,12 @@ &pcie3x4 {
 };

 &pinctrl {
+	hdmirx {
+		hdmirx_5v_detection: hdmirx-5v-detection {
+			rockchip,pins = <1 RK_PC6 RK_FUNC_GPIO &pcfg_pull_none>;
+		};
+	};
+
 	hym8563 {
 		hym8563_int: hym8563-int {
 			rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588.dtsi b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
index 5984016b5f96..534c42262c73 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3588.dtsi
@@ -7,6 +7,24 @@
 #include "rk3588-pinctrl.dtsi"

 / {
+	reserved-memory {
+		#address-cells = <2>;
+		#size-cells = <2>;
+		ranges;
+		/*
+		 * The 4k HDMI capture controller works only with 32bit
+		 * phys addresses and doesn't support IOMMU. HDMI RX CMA
+		 * must be reserved below 4GB.
+		 */
+		hdmirx_cma: hdmirx_cma {
+			compatible = "shared-dma-pool";
+			alloc-ranges = <0x0 0x0 0x0 0xffffffff>;
+			size = <0x0 (160 * 0x100000)>; /* 160MiB */
+			no-map;
+			status = "disabled";
+		};
+	};
+
 	usb_host1_xhci: usb@fc400000 {
 		compatible = "rockchip,rk3588-dwc3", "snps,dwc3";
 		reg = <0x0 0xfc400000 0x0 0x400000>;
@@ -135,6 +153,38 @@ i2s10_8ch: i2s@fde00000 {
 		status = "disabled";
 	};

+	hdmirx_ctrler: hdmirx-controller@fdee0000 {
+		compatible = "rockchip,rk3588-hdmirx-ctrler", "snps,dw-hdmi-rx";
+		reg = <0x0 0xfdee0000 0x0 0x6000>;
+		power-domains = <&power RK3588_PD_VO1>;
+		rockchip,grf = <&sys_grf>;
+		rockchip,vo1_grf = <&vo1_grf>;
+		interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 436 IRQ_TYPE_LEVEL_HIGH 0>,
+			     <GIC_SPI 179 IRQ_TYPE_LEVEL_HIGH 0>;
+		interrupt-names = "cec", "hdmi", "dma";
+		clocks = <&cru ACLK_HDMIRX>,
+			 <&cru CLK_HDMIRX_AUD>,
+			 <&cru CLK_CR_PARA>,
+			 <&cru PCLK_HDMIRX>,
+			 <&cru CLK_HDMIRX_REF>,
+			 <&cru PCLK_S_HDMIRX>,
+			 <&cru HCLK_VO1>;
+		clock-names = "aclk",
+			      "audio",
+			      "cr_para",
+			      "pclk",
+			      "ref",
+			      "hclk_s_hdmirx",
+			      "hclk_vo1";
+		resets = <&cru SRST_A_HDMIRX>, <&cru SRST_P_HDMIRX>,
+			 <&cru SRST_HDMIRX_REF>, <&cru SRST_A_HDMIRX_BIU>;
+		reset-names = "rst_a", "rst_p", "rst_ref", "rst_biu";
+		pinctrl-0 = <&hdmim1_rx>;
+		pinctrl-names = "default";
+		status = "disabled";
+	};
+
 	pcie3x4: pcie@fe150000 {
 		compatible = "rockchip,rk3588-pcie", "rockchip,rk3568-pcie";
 		#address-cells = <3>;
--
2.42.0


From 68d263c9c7eb2af36574059c7c1ecc45662b1de6 Mon Sep 17 00:00:00 2001
From: Shreeya Patel <shreeya.patel@collabora.com>
Date: Wed, 20 Dec 2023 16:52:01 +0530
Subject: [PATCH 66/69] media: platform: synopsys: Add support for hdmi input
 driver

Add initial support for the Synopsys DesignWare HDMI RX
Controller Driver used by Rockchip RK3588. The driver
supports:
 - HDMI 1.4b and 2.0 modes (HDMI 4k@60Hz)
 - RGB888, YUV422, YUV444 and YCC420 pixel formats
 - CEC
 - EDID configuration

The hardware also has Audio and HDCP capabilities, but these are
not yet supported by the driver.

Signed-off-by: Dingxian Wen <shawn.wen@rock-chips.com>
Co-developed-by: Shreeya Patel <shreeya.patel@collabora.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Tested-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Shreeya Patel <shreeya.patel@collabora.com>
---
 drivers/media/platform/Kconfig                |    1 +
 drivers/media/platform/Makefile               |    1 +
 drivers/media/platform/synopsys/Kconfig       |    3 +
 drivers/media/platform/synopsys/Makefile      |    2 +
 .../media/platform/synopsys/hdmirx/Kconfig    |   18 +
 .../media/platform/synopsys/hdmirx/Makefile   |    4 +
 .../platform/synopsys/hdmirx/snps_hdmirx.c    | 2856 +++++++++++++++++
 .../platform/synopsys/hdmirx/snps_hdmirx.h    |  394 +++
 .../synopsys/hdmirx/snps_hdmirx_cec.c         |  289 ++
 .../synopsys/hdmirx/snps_hdmirx_cec.h         |   46 +
 10 files changed, 3614 insertions(+)

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 91e54215de3a..2f5a9a4fa970 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -82,6 +82,7 @@ source "drivers/media/platform/rockchip/Kconfig"
 source "drivers/media/platform/samsung/Kconfig"
 source "drivers/media/platform/st/Kconfig"
 source "drivers/media/platform/sunxi/Kconfig"
+source "drivers/media/platform/synopsys/Kconfig"
 source "drivers/media/platform/ti/Kconfig"
 source "drivers/media/platform/verisilicon/Kconfig"
 source "drivers/media/platform/via/Kconfig"
diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index 3296ec1ebe16..de77c876f58a 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile
@@ -25,6 +25,7 @@ obj-y += rockchip/
 obj-y += samsung/
 obj-y += st/
 obj-y += sunxi/
+obj-y += synopsys/
 obj-y += ti/
 obj-y += verisilicon/
 obj-y += via/
diff --git a/drivers/media/platform/synopsys/Kconfig b/drivers/media/platform/synopsys/Kconfig
new file mode 100644
index 000000000000..4fd521f78425
--- /dev/null
+++ b/drivers/media/platform/synopsys/Kconfig
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+source "drivers/media/platform/synopsys/hdmirx/Kconfig"
diff --git a/drivers/media/platform/synopsys/Makefile b/drivers/media/platform/synopsys/Makefile
new file mode 100644
index 000000000000..3b12c574dd67
--- /dev/null
+++ b/drivers/media/platform/synopsys/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-y += hdmirx/
diff --git a/drivers/media/platform/synopsys/hdmirx/Kconfig b/drivers/media/platform/synopsys/hdmirx/Kconfig
new file mode 100644
index 000000000000..adcdb7c2ed79
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/Kconfig
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config VIDEO_SYNOPSYS_HDMIRX
+	tristate "Synopsys DesignWare HDMI Receiver driver"
+	depends on VIDEO_DEV
+	depends on ARCH_ROCKCHIP
+	select MEDIA_CONTROLLER
+	select VIDEO_V4L2_SUBDEV_API
+	select VIDEOBUF2_DMA_CONTIG
+	select CEC_CORE
+	select CEC_NOTIFIER
+	select HDMI
+	help
+	  Support for Synopsys HDMI HDMI RX Controller.
+	  This driver supports HDMI 2.0 version.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called synopsys_hdmirx.
diff --git a/drivers/media/platform/synopsys/hdmirx/Makefile b/drivers/media/platform/synopsys/hdmirx/Makefile
new file mode 100644
index 000000000000..2fa2d9e25300
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+synopsys-hdmirx-objs := snps_hdmirx.o snps_hdmirx_cec.o
+
+obj-$(CONFIG_VIDEO_SYNOPSYS_HDMIRX) += synopsys-hdmirx.o
diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c
new file mode 100644
index 000000000000..63a38ee089ec
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.c
@@ -0,0 +1,2856 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Collabora, Ltd.
+ * Author: Shreeya Patel <shreeya.patel@collabora.com>
+ *
+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd.
+ * Author: Dingxian Wen <shawn.wen@rock-chips.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/gpio/consumer.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/v4l2-dv-timings.h>
+#include <linux/workqueue.h>
+
+#include <media/cec.h>
+#include <media/cec-notifier.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-dv-timings.h>
+#include <media/v4l2-event.h>
+#include <media/v4l2-fh.h>
+#include <media/v4l2-ioctl.h>
+#include <media/videobuf2-dma-contig.h>
+#include <media/videobuf2-v4l2.h>
+
+#include <sound/hdmi-codec.h>
+
+#include "snps_hdmirx.h"
+#include "snps_hdmirx_cec.h"
+
+static int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug level (0-3)");
+
+#define EDID_NUM_BLOCKS_MAX			2
+#define EDID_BLOCK_SIZE				128
+#define HDMIRX_STORED_BIT_WIDTH			8
+#define IREF_CLK_FREQ_HZ			428571429
+#define MEMORY_ALIGN_ROUND_UP_BYTES		64
+#define HDMIRX_PLANE_Y				0
+#define HDMIRX_PLANE_CBCR			1
+#define RK_IRQ_HDMIRX_HDMI			210
+#define FILTER_FRAME_CNT			6
+#define RK_SIP_FIQ_CTRL				0x82000024
+#define SIP_WDT_CFG				0x82000026
+#define DETECTION_THRESHOLD			7
+
+/* fiq control sub func */
+enum {
+	RK_SIP_FIQ_CTRL_FIQ_EN = 1,
+	RK_SIP_FIQ_CTRL_FIQ_DIS,
+	RK_SIP_FIQ_CTRL_SET_AFF
+};
+
+/* SIP_WDT_CONFIG call types  */
+enum {
+	WDT_START = 0,
+	WDT_STOP = 1,
+	WDT_PING = 2,
+};
+
+enum hdmirx_pix_fmt {
+	HDMIRX_RGB888 = 0,
+	HDMIRX_YUV422 = 1,
+	HDMIRX_YUV444 = 2,
+	HDMIRX_YUV420 = 3,
+};
+
+enum ddr_store_fmt {
+	STORE_RGB888 = 0,
+	STORE_RGBA_ARGB,
+	STORE_YUV420_8BIT,
+	STORE_YUV420_10BIT,
+	STORE_YUV422_8BIT,
+	STORE_YUV422_10BIT,
+	STORE_YUV444_8BIT,
+	STORE_YUV420_16BIT = 8,
+	STORE_YUV422_16BIT = 9,
+};
+
+enum hdmirx_reg_attr {
+	HDMIRX_ATTR_RW = 0,
+	HDMIRX_ATTR_RO = 1,
+	HDMIRX_ATTR_WO = 2,
+	HDMIRX_ATTR_RE = 3,
+};
+
+enum hdmirx_edid_version {
+	HDMIRX_EDID_USER = 0,
+	HDMIRX_EDID_340M = 1,
+	HDMIRX_EDID_600M = 2,
+};
+
+enum {
+	HDMIRX_RST_A,
+	HDMIRX_RST_P,
+	HDMIRX_RST_REF,
+	HDMIRX_RST_BIU,
+	HDMIRX_NUM_RST,
+};
+
+static const char * const pix_fmt_str[] = {
+	"RGB888",
+	"YUV422",
+	"YUV444",
+	"YUV420",
+};
+
+struct hdmirx_buffer {
+	struct vb2_v4l2_buffer vb;
+	struct list_head queue;
+	u32 buff_addr[VIDEO_MAX_PLANES];
+};
+
+struct hdmirx_output_fmt {
+	u32 fourcc;
+	u8 cplanes;
+	u8 mplanes;
+	u8 bpp[VIDEO_MAX_PLANES];
+};
+
+struct hdmirx_stream {
+	struct snps_hdmirx_dev *hdmirx_dev;
+	struct video_device vdev;
+	struct vb2_queue buf_queue;
+	struct list_head buf_head;
+	struct hdmirx_buffer *curr_buf;
+	struct hdmirx_buffer *next_buf;
+	struct v4l2_pix_format_mplane pixm;
+	const struct hdmirx_output_fmt *out_fmt;
+	struct mutex vlock;
+	spinlock_t vbq_lock;
+	bool stopping;
+	wait_queue_head_t wq_stopped;
+	u32 frame_idx;
+	u32 line_flag_int_cnt;
+	u32 irq_stat;
+};
+
+struct snps_hdmirx_dev {
+	struct device *dev;
+	struct device *codec_dev;
+	struct hdmirx_stream stream;
+	struct v4l2_device v4l2_dev;
+	struct v4l2_ctrl_handler hdl;
+	struct v4l2_ctrl *detect_tx_5v_ctrl;
+	struct v4l2_dv_timings timings;
+	struct gpio_desc *detect_5v_gpio;
+	struct work_struct work_wdt_config;
+	struct delayed_work delayed_work_hotplug;
+	struct delayed_work delayed_work_res_change;
+	struct delayed_work delayed_work_heartbeat;
+	struct cec_notifier *cec_notifier;
+	struct hdmirx_cec *cec;
+	struct mutex stream_lock;
+	struct mutex work_lock;
+	struct reset_control_bulk_data resets[HDMIRX_NUM_RST];
+	struct clk_bulk_data *clks;
+	struct regmap *grf;
+	struct regmap *vo1_grf;
+	struct completion cr_write_done;
+	struct completion timer_base_lock;
+	struct completion avi_pkt_rcv;
+	enum hdmirx_edid_version edid_version;
+	enum hdmirx_pix_fmt pix_fmt;
+	void __iomem *regs;
+	int hdmi_irq;
+	int dma_irq;
+	int det_irq;
+	bool hpd_trigger_level;
+	bool tmds_clk_ratio;
+	bool is_dvi_mode;
+	bool got_timing;
+	u32 num_clks;
+	u32 edid_blocks_written;
+	u32 cur_vic;
+	u32 cur_fmt_fourcc;
+	u32 color_depth;
+	u8 edid[EDID_BLOCK_SIZE * 2];
+	hdmi_codec_plugged_cb plugged_cb;
+	spinlock_t rst_lock;
+};
+
+static u8 edid_init_data_340M[] = {
+	0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+	0x49, 0x70, 0x88, 0x35, 0x01, 0x00, 0x00, 0x00,
+	0x2D, 0x1F, 0x01, 0x03, 0x80, 0x78, 0x44, 0x78,
+	0x0A, 0xCF, 0x74, 0xA3, 0x57, 0x4C, 0xB0, 0x23,
+	0x09, 0x48, 0x4C, 0x21, 0x08, 0x00, 0x61, 0x40,
+	0x01, 0x01, 0x81, 0x00, 0x95, 0x00, 0xA9, 0xC0,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x3A,
+	0x80, 0x18, 0x71, 0x38, 0x2D, 0x40, 0x58, 0x2C,
+	0x45, 0x00, 0x20, 0xC2, 0x31, 0x00, 0x00, 0x1E,
+	0x01, 0x1D, 0x00, 0x72, 0x51, 0xD0, 0x1E, 0x20,
+	0x6E, 0x28, 0x55, 0x00, 0x20, 0xC2, 0x31, 0x00,
+	0x00, 0x1E, 0x00, 0x00, 0x00, 0xFC, 0x00, 0x52,
+	0x4B, 0x2D, 0x55, 0x48, 0x44, 0x0A, 0x20, 0x20,
+	0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xFD,
+	0x00, 0x3B, 0x46, 0x1F, 0x8C, 0x3C, 0x00, 0x0A,
+	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x01, 0xA7,
+
+	0x02, 0x03, 0x2F, 0xD1, 0x51, 0x07, 0x16, 0x14,
+	0x05, 0x01, 0x03, 0x12, 0x13, 0x84, 0x22, 0x1F,
+	0x90, 0x5D, 0x5E, 0x5F, 0x60, 0x61, 0x23, 0x09,
+	0x07, 0x07, 0x83, 0x01, 0x00, 0x00, 0x67, 0x03,
+	0x0C, 0x00, 0x30, 0x00, 0x10, 0x44, 0xE3, 0x05,
+	0x03, 0x01, 0xE4, 0x0F, 0x00, 0x80, 0x01, 0x02,
+	0x3A, 0x80, 0x18, 0x71, 0x38, 0x2D, 0x40, 0x58,
+	0x2C, 0x45, 0x00, 0x20, 0xC2, 0x31, 0x00, 0x00,
+	0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1F,
+};
+
+static u8 edid_init_data_600M[] = {
+	0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+	0x49, 0x70, 0x88, 0x35, 0x01, 0x00, 0x00, 0x00,
+	0x2D, 0x1F, 0x01, 0x03, 0x80, 0x78, 0x44, 0x78,
+	0x0A, 0xCF, 0x74, 0xA3, 0x57, 0x4C, 0xB0, 0x23,
+	0x09, 0x48, 0x4C, 0x00, 0x00, 0x00, 0x01, 0x01,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x08, 0xE8,
+	0x00, 0x30, 0xF2, 0x70, 0x5A, 0x80, 0xB0, 0x58,
+	0x8A, 0x00, 0xC4, 0x8E, 0x21, 0x00, 0x00, 0x1E,
+	0x08, 0xE8, 0x00, 0x30, 0xF2, 0x70, 0x5A, 0x80,
+	0xB0, 0x58, 0x8A, 0x00, 0x20, 0xC2, 0x31, 0x00,
+	0x00, 0x1E, 0x00, 0x00, 0x00, 0xFC, 0x00, 0x52,
+	0x4B, 0x2D, 0x55, 0x48, 0x44, 0x0A, 0x20, 0x20,
+	0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xFD,
+	0x00, 0x3B, 0x46, 0x1F, 0x8C, 0x3C, 0x00, 0x0A,
+	0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x01, 0x39,
+
+	0x02, 0x03, 0x21, 0xD2, 0x41, 0x61, 0x23, 0x09,
+	0x07, 0x07, 0x83, 0x01, 0x00, 0x00, 0x66, 0x03,
+	0x0C, 0x00, 0x30, 0x00, 0x10, 0x67, 0xD8, 0x5D,
+	0xC4, 0x01, 0x78, 0xC0, 0x07, 0xE3, 0x05, 0x03,
+	0x01, 0x08, 0xE8, 0x00, 0x30, 0xF2, 0x70, 0x5A,
+	0x80, 0xB0, 0x58, 0x8A, 0x00, 0xC4, 0x8E, 0x21,
+	0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xE8,
+};
+
+static const struct v4l2_dv_timings cea640x480 = V4L2_DV_BT_CEA_640X480P59_94;
+
+static const struct v4l2_dv_timings_cap hdmirx_timings_cap = {
+	.type = V4L2_DV_BT_656_1120,
+	.reserved = { 0 },
+	V4L2_INIT_BT_TIMINGS(640, 4096,			/* min/max width */
+			     480, 2160,			/* min/max height */
+			     20000000, 600000000,	/* min/max pixelclock */
+			     /* standards */
+			     V4L2_DV_BT_STD_CEA861,
+			     /* capabilities */
+			     V4L2_DV_BT_CAP_PROGRESSIVE |
+			     V4L2_DV_BT_CAP_INTERLACED)
+};
+
+static const struct hdmirx_output_fmt g_out_fmts[] = {
+	{
+		.fourcc = V4L2_PIX_FMT_BGR24,
+		.cplanes = 1,
+		.mplanes = 1,
+		.bpp = { 24 },
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV24,
+		.cplanes = 2,
+		.mplanes = 1,
+		.bpp = { 8, 16 },
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV16,
+		.cplanes = 2,
+		.mplanes = 1,
+		.bpp = { 8, 16 },
+	}, {
+		.fourcc = V4L2_PIX_FMT_NV12,
+		.cplanes = 2,
+		.mplanes = 1,
+		.bpp = { 8, 16 },
+	}
+};
+
+static void hdmirx_writel(struct snps_hdmirx_dev *hdmirx_dev, int reg, u32 val)
+{
+	unsigned long lock_flags = 0;
+
+	spin_lock_irqsave(&hdmirx_dev->rst_lock, lock_flags);
+	writel(val, hdmirx_dev->regs + reg);
+	spin_unlock_irqrestore(&hdmirx_dev->rst_lock, lock_flags);
+}
+
+static u32 hdmirx_readl(struct snps_hdmirx_dev *hdmirx_dev, int reg)
+{
+	unsigned long lock_flags = 0;
+	u32 val;
+
+	spin_lock_irqsave(&hdmirx_dev->rst_lock, lock_flags);
+	val = readl(hdmirx_dev->regs + reg);
+	spin_unlock_irqrestore(&hdmirx_dev->rst_lock, lock_flags);
+	return val;
+}
+
+static void hdmirx_reset_dma(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	unsigned long lock_flags = 0;
+
+	spin_lock_irqsave(&hdmirx_dev->rst_lock, lock_flags);
+	reset_control_reset(hdmirx_dev->resets[0].rstc);
+	spin_unlock_irqrestore(&hdmirx_dev->rst_lock, lock_flags);
+}
+
+static void hdmirx_update_bits(struct snps_hdmirx_dev *hdmirx_dev, int reg,
+			       u32 mask, u32 data)
+{
+	unsigned long lock_flags = 0;
+	u32 val;
+
+	spin_lock_irqsave(&hdmirx_dev->rst_lock, lock_flags);
+	val = readl(hdmirx_dev->regs + reg) & ~mask;
+	val |= (data & mask);
+	writel(val, hdmirx_dev->regs + reg);
+	spin_unlock_irqrestore(&hdmirx_dev->rst_lock, lock_flags);
+}
+
+static int hdmirx_subscribe_event(struct v4l2_fh *fh,
+				  const struct v4l2_event_subscription *sub)
+{
+	switch (sub->type) {
+	case V4L2_EVENT_SOURCE_CHANGE:
+		if (fh->vdev->vfl_dir == VFL_DIR_RX)
+			return v4l2_src_change_event_subscribe(fh, sub);
+		break;
+	case V4L2_EVENT_CTRL:
+		return v4l2_ctrl_subscribe_event(fh, sub);
+	default:
+		return v4l2_ctrl_subscribe_event(fh, sub);
+	}
+
+	return -EINVAL;
+}
+
+static bool tx_5v_power_present(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	bool ret;
+	int val, i, cnt;
+
+	cnt = 0;
+	for (i = 0; i < 10; i++) {
+		usleep_range(1000, 1100);
+		val = gpiod_get_value(hdmirx_dev->detect_5v_gpio);
+		if (val > 0)
+			cnt++;
+		if (cnt >= DETECTION_THRESHOLD)
+			break;
+	}
+
+	ret = (cnt >= DETECTION_THRESHOLD) ? true : false;
+	v4l2_dbg(3, debug, &hdmirx_dev->v4l2_dev, "%s: %d\n", __func__, ret);
+
+	return ret;
+}
+
+static bool signal_not_lock(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	u32 mu_status, dma_st10, cmu_st;
+
+	mu_status = hdmirx_readl(hdmirx_dev, MAINUNIT_STATUS);
+	dma_st10 = hdmirx_readl(hdmirx_dev, DMA_STATUS10);
+	cmu_st = hdmirx_readl(hdmirx_dev, CMU_STATUS);
+
+	if ((mu_status & TMDSVALID_STABLE_ST) &&
+	    (dma_st10 & HDMIRX_LOCK) &&
+	    (cmu_st & TMDSQPCLK_LOCKED_ST))
+		return false;
+
+	return true;
+}
+
+static void hdmirx_get_colordepth(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 val, color_depth_reg;
+
+	val = hdmirx_readl(hdmirx_dev, DMA_STATUS11);
+	color_depth_reg = (val & HDMIRX_COLOR_DEPTH_MASK) >> 3;
+
+	switch (color_depth_reg) {
+	case 0x4:
+		hdmirx_dev->color_depth = 24;
+		break;
+	case 0x5:
+		hdmirx_dev->color_depth = 30;
+		break;
+	case 0x6:
+		hdmirx_dev->color_depth = 36;
+		break;
+	case 0x7:
+		hdmirx_dev->color_depth = 48;
+		break;
+	default:
+		hdmirx_dev->color_depth = 24;
+		break;
+	}
+
+	v4l2_dbg(1, debug, v4l2_dev, "%s: color_depth: %d, reg_val:%d\n",
+		 __func__, hdmirx_dev->color_depth, color_depth_reg);
+}
+
+static void hdmirx_get_pix_fmt(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 val;
+
+	val = hdmirx_readl(hdmirx_dev, DMA_STATUS11);
+	hdmirx_dev->pix_fmt = val & HDMIRX_FORMAT_MASK;
+
+	switch (hdmirx_dev->pix_fmt) {
+	case HDMIRX_RGB888:
+		hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_BGR24;
+		break;
+	case HDMIRX_YUV422:
+		hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_NV16;
+		break;
+	case HDMIRX_YUV444:
+		hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_NV24;
+		break;
+	case HDMIRX_YUV420:
+		hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_NV12;
+		break;
+	default:
+		v4l2_err(v4l2_dev,
+			 "%s: err pix_fmt: %d, set RGB888 as default\n",
+			 __func__, hdmirx_dev->pix_fmt);
+		hdmirx_dev->pix_fmt = HDMIRX_RGB888;
+		hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_BGR24;
+		break;
+	}
+
+	v4l2_dbg(1, debug, v4l2_dev, "%s: pix_fmt: %s\n", __func__,
+		 pix_fmt_str[hdmirx_dev->pix_fmt]);
+}
+
+static void hdmirx_get_timings(struct snps_hdmirx_dev *hdmirx_dev,
+			       struct v4l2_bt_timings *bt, bool from_dma)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 hact, vact, htotal, vtotal, fps;
+	u32 hfp, hs, hbp, vfp, vs, vbp;
+	u32 val;
+
+	if (from_dma) {
+		val = hdmirx_readl(hdmirx_dev, DMA_STATUS2);
+		hact = (val >> 16) & 0xffff;
+		vact = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, DMA_STATUS3);
+		htotal = (val >> 16) & 0xffff;
+		vtotal = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, DMA_STATUS4);
+		hs = (val >> 16) & 0xffff;
+		vs = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, DMA_STATUS5);
+		hbp = (val >> 16) & 0xffff;
+		vbp = val & 0xffff;
+		hfp = htotal - hact - hs - hbp;
+		vfp = vtotal - vact - vs - vbp;
+	} else {
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS1);
+		hs = (val >> 16) & 0xffff;
+		hfp = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS2);
+		hbp = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS3);
+		htotal = (val >> 16) & 0xffff;
+		hact = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS4);
+		vs = (val >> 16) & 0xffff;
+		vfp = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS5);
+		vbp = val & 0xffff;
+		val = hdmirx_readl(hdmirx_dev, VMON_STATUS6);
+		vtotal = (val >> 16) & 0xffff;
+		vact = val & 0xffff;
+		if (hdmirx_dev->pix_fmt == HDMIRX_YUV420)
+			hact *= 2;
+	}
+	if (hdmirx_dev->pix_fmt == HDMIRX_YUV420)
+		htotal *= 2;
+	fps = (bt->pixelclock + (htotal * vtotal) / 2) / (htotal * vtotal);
+	if (hdmirx_dev->pix_fmt == HDMIRX_YUV420)
+		fps *= 2;
+	bt->width = hact;
+	bt->height = vact;
+	bt->hfrontporch = hfp;
+	bt->hsync = hs;
+	bt->hbackporch = hbp;
+	bt->vfrontporch = vfp;
+	bt->vsync = vs;
+	bt->vbackporch = vbp;
+
+	v4l2_dbg(1, debug, v4l2_dev, "get timings from %s\n", from_dma ? "dma" : "ctrl");
+	v4l2_dbg(1, debug, v4l2_dev, "act:%ux%u, total:%ux%u, fps:%u, pixclk:%llu\n",
+		 bt->width, bt->height, htotal, vtotal, fps, bt->pixelclock);
+
+	v4l2_dbg(2, debug, v4l2_dev, "hfp:%u, hs:%u, hbp:%u, vfp:%u, vs:%u, vbp:%u\n",
+		 bt->hfrontporch, bt->hsync, bt->hbackporch,
+		 bt->vfrontporch, bt->vsync, bt->vbackporch);
+}
+
+static bool hdmirx_check_timing_valid(struct v4l2_bt_timings *bt)
+{
+	if (bt->width < 100 || bt->width > 5000 ||
+	    bt->height < 100 || bt->height > 5000)
+		return false;
+
+	if (!bt->hsync || bt->hsync > 200 ||
+	    !bt->vsync || bt->vsync > 100)
+		return false;
+
+	if (!bt->hbackporch || bt->hbackporch > 2000 ||
+	    !bt->vbackporch || bt->vbackporch > 2000)
+		return false;
+
+	if (!bt->hfrontporch || bt->hfrontporch > 2000 ||
+	    !bt->vfrontporch || bt->vfrontporch > 2000)
+		return false;
+
+	return true;
+}
+
+static int hdmirx_get_detected_timings(struct snps_hdmirx_dev *hdmirx_dev,
+				       struct v4l2_dv_timings *timings,
+				       bool from_dma)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct v4l2_bt_timings *bt = &timings->bt;
+	u32 field_type, color_depth, deframer_st;
+	u32 val, tmdsqpclk_freq, pix_clk;
+	u64 tmp_data, tmds_clk;
+
+	memset(timings, 0, sizeof(struct v4l2_dv_timings));
+	timings->type = V4L2_DV_BT_656_1120;
+
+	val = hdmirx_readl(hdmirx_dev, DMA_STATUS11);
+	field_type = (val & HDMIRX_TYPE_MASK) >> 7;
+	hdmirx_get_pix_fmt(hdmirx_dev);
+	bt->interlaced = field_type & BIT(0) ? V4L2_DV_INTERLACED : V4L2_DV_PROGRESSIVE;
+	val = hdmirx_readl(hdmirx_dev, PKTDEC_AVIIF_PB7_4);
+	hdmirx_dev->cur_vic =  val | VIC_VAL_MASK;
+	hdmirx_get_colordepth(hdmirx_dev);
+	color_depth = hdmirx_dev->color_depth;
+	deframer_st = hdmirx_readl(hdmirx_dev, DEFRAMER_STATUS);
+	hdmirx_dev->is_dvi_mode = deframer_st & OPMODE_STS_MASK ? false : true;
+	tmdsqpclk_freq = hdmirx_readl(hdmirx_dev, CMU_TMDSQPCLK_FREQ);
+	tmds_clk = tmdsqpclk_freq * 4 * 1000;
+	tmp_data = tmds_clk * 24;
+	do_div(tmp_data, color_depth);
+	pix_clk = tmp_data;
+	bt->pixelclock = pix_clk;
+
+	hdmirx_get_timings(hdmirx_dev, bt, from_dma);
+	if (bt->interlaced == V4L2_DV_INTERLACED) {
+		bt->height *= 2;
+		bt->il_vsync = bt->vsync + 1;
+	}
+
+	v4l2_dbg(2, debug, v4l2_dev, "tmds_clk:%llu\n", tmds_clk);
+	v4l2_dbg(1, debug, v4l2_dev, "interlace:%d, fmt:%d, vic:%d, color:%d, mode:%s\n",
+		 bt->interlaced, hdmirx_dev->pix_fmt,
+		 hdmirx_dev->cur_vic, hdmirx_dev->color_depth,
+		 hdmirx_dev->is_dvi_mode ? "dvi" : "hdmi");
+	v4l2_dbg(2, debug, v4l2_dev, "deframer_st:%#x\n", deframer_st);
+
+	if (!hdmirx_check_timing_valid(bt))
+		return -EINVAL;
+
+	return 0;
+}
+
+static bool port_no_link(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	return !tx_5v_power_present(hdmirx_dev);
+}
+
+static int hdmirx_query_dv_timings(struct file *file, void *_fh,
+				   struct v4l2_dv_timings *timings)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	int ret;
+
+	if (port_no_link(hdmirx_dev)) {
+		v4l2_err(v4l2_dev, "%s: port has no link\n", __func__);
+		return -ENOLINK;
+	}
+
+	if (signal_not_lock(hdmirx_dev)) {
+		v4l2_err(v4l2_dev, "%s: signal is not locked\n", __func__);
+		return -ENOLCK;
+	}
+
+	/*
+	 * query dv timings is during preview, dma's timing is stable,
+	 * so we can get from DMA. If the current resolution is negative,
+	 * get timing from CTRL need to change polarity of sync,
+	 * maybe cause DMA errors.
+	 */
+	ret = hdmirx_get_detected_timings(hdmirx_dev, timings, true);
+	if (ret)
+		return ret;
+
+	if (debug)
+		v4l2_print_dv_timings(hdmirx_dev->v4l2_dev.name,
+				      "query_dv_timings: ", timings, false);
+
+	if (!v4l2_valid_dv_timings(timings, &hdmirx_timings_cap, NULL, NULL)) {
+		v4l2_dbg(1, debug, v4l2_dev, "%s: timings out of range\n", __func__);
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static void hdmirx_hpd_ctrl(struct snps_hdmirx_dev *hdmirx_dev, bool en)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	v4l2_dbg(1, debug, v4l2_dev, "%s: %sable, hpd_trigger_level:%d\n",
+		 __func__, en ? "en" : "dis",
+		 hdmirx_dev->hpd_trigger_level);
+	hdmirx_update_bits(hdmirx_dev, SCDC_CONFIG, HPDLOW, en ? 0 : HPDLOW);
+	en = hdmirx_dev->hpd_trigger_level ? en : !en;
+	hdmirx_writel(hdmirx_dev, CORE_CONFIG, en);
+}
+
+static int hdmirx_write_edid(struct snps_hdmirx_dev *hdmirx_dev,
+			     struct v4l2_edid *edid, bool hpd_up)
+{
+	u32 edid_len = edid->blocks * EDID_BLOCK_SIZE;
+	char data[300];
+	u32 i;
+
+	memset(edid->reserved, 0, sizeof(edid->reserved));
+	if (edid->pad)
+		return -EINVAL;
+
+	if (edid->start_block)
+		return -EINVAL;
+
+	if (edid->blocks > EDID_NUM_BLOCKS_MAX) {
+		edid->blocks = EDID_NUM_BLOCKS_MAX;
+		return -E2BIG;
+	}
+
+	if (!edid->blocks) {
+		hdmirx_dev->edid_blocks_written = 0;
+		return 0;
+	}
+
+	memset(&hdmirx_dev->edid, 0, sizeof(hdmirx_dev->edid));
+	hdmirx_hpd_ctrl(hdmirx_dev, false);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG11,
+			   EDID_READ_EN_MASK |
+			   EDID_WRITE_EN_MASK |
+			   EDID_SLAVE_ADDR_MASK,
+			   EDID_READ_EN(0) |
+			   EDID_WRITE_EN(1) |
+			   EDID_SLAVE_ADDR(0x50));
+	for (i = 0; i < edid_len; i++)
+		hdmirx_writel(hdmirx_dev, DMA_CONFIG10, edid->edid[i]);
+
+	/* read out for debug */
+	if (debug >= 2) {
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG11,
+				   EDID_READ_EN_MASK |
+				   EDID_WRITE_EN_MASK,
+				   EDID_READ_EN(1) |
+				   EDID_WRITE_EN(0));
+		edid_len = edid_len > sizeof(data) ? sizeof(data) : edid_len;
+		memset(data, 0, sizeof(data));
+		for (i = 0; i < edid_len; i++)
+			data[i] = hdmirx_readl(hdmirx_dev, DMA_STATUS14);
+
+		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, data,
+			       edid_len, false);
+	}
+
+	/*
+	 * You must set EDID_READ_EN & EDID_WRITE_EN bit to 0,
+	 * when the read/write edid operation is completed.Otherwise, it
+	 * will affect the reading and writing of other registers
+	 */
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG11,
+			   EDID_READ_EN_MASK | EDID_WRITE_EN_MASK,
+			   EDID_READ_EN(0) | EDID_WRITE_EN(0));
+
+	hdmirx_dev->edid_blocks_written = edid->blocks;
+	memcpy(&hdmirx_dev->edid, edid->edid, edid->blocks * EDID_BLOCK_SIZE);
+	if (hpd_up) {
+		if (tx_5v_power_present(hdmirx_dev))
+			hdmirx_hpd_ctrl(hdmirx_dev, true);
+	}
+
+	return 0;
+}
+
+/*
+ * Before clearing interrupt, we need to read the interrupt status.
+ */
+static inline void hdmirx_clear_interrupt(struct snps_hdmirx_dev *hdmirx_dev,
+					  u32 reg, u32 val)
+{
+	/* (interrupt status register) = (interrupt clear register) - 0x8 */
+	hdmirx_readl(hdmirx_dev, reg - 0x8);
+	hdmirx_writel(hdmirx_dev, reg, val);
+}
+
+static void hdmirx_interrupts_setup(struct snps_hdmirx_dev *hdmirx_dev, bool en)
+{
+	v4l2_dbg(1, debug, &hdmirx_dev->v4l2_dev, "%s: %sable\n",
+		 __func__, en ? "en" : "dis");
+
+	/* Note: In DVI mode, it needs to be written twice to take effect. */
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, AVPUNIT_0_INT_CLEAR, 0xffffffff);
+
+	if (en) {
+		hdmirx_update_bits(hdmirx_dev, MAINUNIT_0_INT_MASK_N,
+				   TMDSQPCLK_OFF_CHG | TMDSQPCLK_LOCKED_CHG,
+				   TMDSQPCLK_OFF_CHG | TMDSQPCLK_LOCKED_CHG);
+		hdmirx_update_bits(hdmirx_dev, MAINUNIT_2_INT_MASK_N,
+				   TMDSVALID_STABLE_CHG, TMDSVALID_STABLE_CHG);
+		hdmirx_update_bits(hdmirx_dev, AVPUNIT_0_INT_MASK_N,
+				   CED_DYN_CNT_CH2_IRQ |
+				   CED_DYN_CNT_CH1_IRQ |
+				   CED_DYN_CNT_CH0_IRQ,
+				   CED_DYN_CNT_CH2_IRQ |
+				   CED_DYN_CNT_CH1_IRQ |
+				   CED_DYN_CNT_CH0_IRQ);
+	} else {
+		hdmirx_writel(hdmirx_dev, MAINUNIT_0_INT_MASK_N, 0);
+		hdmirx_writel(hdmirx_dev, MAINUNIT_2_INT_MASK_N, 0);
+		hdmirx_writel(hdmirx_dev, AVPUNIT_0_INT_MASK_N, 0);
+	}
+}
+
+static void hdmirx_plugout(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct arm_smccc_res res;
+
+	hdmirx_update_bits(hdmirx_dev, SCDC_CONFIG, POWERPROVIDED, 0);
+	hdmirx_interrupts_setup(hdmirx_dev, false);
+	hdmirx_hpd_ctrl(hdmirx_dev, false);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, HDMIRX_DMA_EN, 0);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG4,
+			   LINE_FLAG_INT_EN |
+			   HDMIRX_DMA_IDLE_INT |
+			   HDMIRX_LOCK_DISABLE_INT |
+			   LAST_FRAME_AXI_UNFINISH_INT_EN |
+			   FIFO_OVERFLOW_INT_EN |
+			   FIFO_UNDERFLOW_INT_EN |
+			   HDMIRX_AXI_ERROR_INT_EN, 0);
+	hdmirx_reset_dma(hdmirx_dev);
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, HDMI_DISABLE | PHY_RESET |
+			   PHY_PDDQ, HDMI_DISABLE);
+	hdmirx_writel(hdmirx_dev, PHYCREG_CONFIG0, 0x0);
+	cancel_delayed_work(&hdmirx_dev->delayed_work_res_change);
+	cancel_delayed_work_sync(&hdmirx_dev->delayed_work_heartbeat);
+	flush_work(&hdmirx_dev->work_wdt_config);
+	arm_smccc_smc(SIP_WDT_CFG, WDT_STOP, 0, 0, 0, 0, 0, 0, &res);
+}
+
+static int hdmirx_set_edid(struct file *file, void *fh, struct v4l2_edid *edid)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct arm_smccc_res res;
+	int ret;
+
+	disable_irq(hdmirx_dev->hdmi_irq);
+	disable_irq(hdmirx_dev->dma_irq);
+	arm_smccc_smc(RK_SIP_FIQ_CTRL, RK_SIP_FIQ_CTRL_FIQ_DIS,
+		      RK_IRQ_HDMIRX_HDMI, 0, 0, 0, 0, 0, &res);
+
+	if (tx_5v_power_present(hdmirx_dev))
+		hdmirx_plugout(hdmirx_dev);
+	ret = hdmirx_write_edid(hdmirx_dev, edid, false);
+	if (ret)
+		return ret;
+	hdmirx_dev->edid_version = HDMIRX_EDID_USER;
+
+	enable_irq(hdmirx_dev->hdmi_irq);
+	enable_irq(hdmirx_dev->dma_irq);
+	arm_smccc_smc(RK_SIP_FIQ_CTRL, RK_SIP_FIQ_CTRL_FIQ_EN,
+		      RK_IRQ_HDMIRX_HDMI, 0, 0, 0, 0, 0, &res);
+	queue_delayed_work(system_unbound_wq,
+			   &hdmirx_dev->delayed_work_hotplug,
+			   msecs_to_jiffies(500));
+	return 0;
+}
+
+static int hdmirx_get_edid(struct file *file, void *fh, struct v4l2_edid *edid)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	memset(edid->reserved, 0, sizeof(edid->reserved));
+
+	if (edid->pad)
+		return -EINVAL;
+
+	if (!edid->start_block && !edid->blocks) {
+		edid->blocks = hdmirx_dev->edid_blocks_written;
+		return 0;
+	}
+
+	if (!hdmirx_dev->edid_blocks_written)
+		return -ENODATA;
+
+	if (edid->start_block >= hdmirx_dev->edid_blocks_written || !edid->blocks)
+		return -EINVAL;
+
+	if (edid->start_block + edid->blocks > hdmirx_dev->edid_blocks_written)
+		edid->blocks = hdmirx_dev->edid_blocks_written - edid->start_block;
+
+	memcpy(edid->edid, &hdmirx_dev->edid, edid->blocks * EDID_BLOCK_SIZE);
+
+	v4l2_dbg(1, debug, v4l2_dev, "%s: read EDID:\n", __func__);
+	if (debug > 0)
+		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
+			       edid->edid, edid->blocks * EDID_BLOCK_SIZE, false);
+
+	return 0;
+}
+
+static int hdmirx_g_parm(struct file *file, void *priv,
+			 struct v4l2_streamparm *parm)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_fract fps;
+
+	if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
+		return -EINVAL;
+
+	fps = v4l2_calc_timeperframe(&hdmirx_dev->timings);
+	parm->parm.capture.timeperframe.numerator = fps.numerator;
+	parm->parm.capture.timeperframe.denominator = fps.denominator;
+
+	return 0;
+}
+
+static int hdmirx_dv_timings_cap(struct file *file, void *fh,
+				 struct v4l2_dv_timings_cap *cap)
+{
+	*cap = hdmirx_timings_cap;
+	return 0;
+}
+
+static int hdmirx_enum_dv_timings(struct file *file, void *_fh,
+				  struct v4l2_enum_dv_timings *timings)
+{
+	return v4l2_enum_dv_timings_cap(timings, &hdmirx_timings_cap, NULL, NULL);
+}
+
+static void hdmirx_scdc_init(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	hdmirx_update_bits(hdmirx_dev, I2C_SLAVE_CONFIG1,
+			   I2C_SDA_OUT_HOLD_VALUE_QST_MASK |
+			   I2C_SDA_IN_HOLD_VALUE_QST_MASK,
+			   I2C_SDA_OUT_HOLD_VALUE_QST(0x80) |
+			   I2C_SDA_IN_HOLD_VALUE_QST(0x15));
+	hdmirx_update_bits(hdmirx_dev, SCDC_REGBANK_CONFIG0,
+			   SCDC_SINKVERSION_QST_MASK,
+			   SCDC_SINKVERSION_QST(1));
+}
+
+static int wait_reg_bit_status(struct snps_hdmirx_dev *hdmirx_dev, u32 reg,
+			       u32 bit_mask, u32 expect_val, bool is_grf,
+			       u32 ms)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 i, val;
+
+	for (i = 0; i < ms; i++) {
+		if (is_grf)
+			regmap_read(hdmirx_dev->grf, reg, &val);
+		else
+			val = hdmirx_readl(hdmirx_dev, reg);
+
+		if ((val & bit_mask) == expect_val) {
+			v4l2_dbg(2, debug, v4l2_dev,
+				 "%s:  i:%d, time: %dms\n", __func__, i, ms);
+			break;
+		}
+		usleep_range(1000, 1010);
+	}
+
+	if (i == ms)
+		return -1;
+
+	return 0;
+}
+
+static int hdmirx_phy_register_write(struct snps_hdmirx_dev *hdmirx_dev,
+				     u32 phy_reg, u32 val)
+{
+	struct device *dev = hdmirx_dev->dev;
+
+	reinit_completion(&hdmirx_dev->cr_write_done);
+	/* clear irq status */
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_2_INT_CLEAR, 0xffffffff);
+	/* en irq */
+	hdmirx_update_bits(hdmirx_dev, MAINUNIT_2_INT_MASK_N,
+			   PHYCREG_CR_WRITE_DONE, PHYCREG_CR_WRITE_DONE);
+	/* write phy reg addr */
+	hdmirx_writel(hdmirx_dev, PHYCREG_CONFIG1, phy_reg);
+	/* write phy reg val */
+	hdmirx_writel(hdmirx_dev, PHYCREG_CONFIG2, val);
+	/* config write enable */
+	hdmirx_writel(hdmirx_dev, PHYCREG_CONTROL, PHYCREG_CR_PARA_WRITE_P);
+
+	if (!wait_for_completion_timeout(&hdmirx_dev->cr_write_done,
+					 msecs_to_jiffies(20))) {
+		dev_err(dev, "%s wait cr write done failed\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+static void hdmirx_tmds_clk_ratio_config(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 val;
+
+	val = hdmirx_readl(hdmirx_dev, SCDC_REGBANK_STATUS1);
+	v4l2_dbg(3, debug, v4l2_dev, "%s: scdc_regbank_st:%#x\n", __func__, val);
+	hdmirx_dev->tmds_clk_ratio = (val & SCDC_TMDSBITCLKRATIO) > 0;
+
+	if (hdmirx_dev->tmds_clk_ratio) {
+		v4l2_dbg(3, debug, v4l2_dev, "%s: HDMITX greater than 3.4Gbps\n", __func__);
+		hdmirx_update_bits(hdmirx_dev, PHY_CONFIG,
+				   TMDS_CLOCK_RATIO, TMDS_CLOCK_RATIO);
+	} else {
+		v4l2_dbg(3, debug, v4l2_dev, "%s: HDMITX less than 3.4Gbps\n", __func__);
+		hdmirx_update_bits(hdmirx_dev, PHY_CONFIG,
+				   TMDS_CLOCK_RATIO, 0);
+	}
+}
+
+static void hdmirx_phy_config(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct device *dev = hdmirx_dev->dev;
+
+	hdmirx_clear_interrupt(hdmirx_dev, SCDC_INT_CLEAR, 0xffffffff);
+	hdmirx_update_bits(hdmirx_dev, SCDC_INT_MASK_N, SCDCTMDSCCFG_CHG,
+			   SCDCTMDSCCFG_CHG);
+	/* cr_para_clk 24M */
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, REFFREQ_SEL_MASK, REFFREQ_SEL(0));
+	/* rx data width 40bit valid */
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, RXDATA_WIDTH, RXDATA_WIDTH);
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, PHY_RESET, PHY_RESET);
+	usleep_range(100, 110);
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, PHY_RESET, 0);
+	usleep_range(100, 110);
+	/* select cr para interface */
+	hdmirx_writel(hdmirx_dev, PHYCREG_CONFIG0, 0x3);
+
+	if (wait_reg_bit_status(hdmirx_dev, SYS_GRF_SOC_STATUS1,
+				HDMIRXPHY_SRAM_INIT_DONE,
+				HDMIRXPHY_SRAM_INIT_DONE, true, 10))
+		dev_err(dev, "%s: phy SRAM init failed\n", __func__);
+
+	regmap_write(hdmirx_dev->grf, SYS_GRF_SOC_CON1,
+		     (HDMIRXPHY_SRAM_EXT_LD_DONE << 16) |
+		     HDMIRXPHY_SRAM_EXT_LD_DONE);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 2);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 3);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 2);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 2);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 3);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 2);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 0);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 1);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 0);
+	hdmirx_phy_register_write(hdmirx_dev, SUP_DIG_ANA_CREGS_SUP_ANA_NC, 0);
+
+	hdmirx_phy_register_write(hdmirx_dev,
+				  HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_3_REG,
+				  CDR_SETTING_BOUNDARY_3_DEFAULT);
+	hdmirx_phy_register_write(hdmirx_dev,
+				  HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_4_REG,
+				  CDR_SETTING_BOUNDARY_4_DEFAULT);
+	hdmirx_phy_register_write(hdmirx_dev,
+				  HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_5_REG,
+				  CDR_SETTING_BOUNDARY_5_DEFAULT);
+	hdmirx_phy_register_write(hdmirx_dev,
+				  HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_6_REG,
+				  CDR_SETTING_BOUNDARY_6_DEFAULT);
+	hdmirx_phy_register_write(hdmirx_dev,
+				  HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_7_REG,
+				  CDR_SETTING_BOUNDARY_7_DEFAULT);
+
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, PHY_PDDQ, 0);
+	if (wait_reg_bit_status(hdmirx_dev, PHY_STATUS, PDDQ_ACK, 0, false, 10))
+		dev_err(dev, "%s: wait pddq ack failed\n", __func__);
+
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, HDMI_DISABLE, 0);
+	if (wait_reg_bit_status(hdmirx_dev, PHY_STATUS, HDMI_DISABLE_ACK, 0,
+				false, 50))
+		dev_err(dev, "%s: wait hdmi disable ack failed\n", __func__);
+
+	hdmirx_tmds_clk_ratio_config(hdmirx_dev);
+}
+
+static void hdmirx_controller_init(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct device *dev = hdmirx_dev->dev;
+
+	reinit_completion(&hdmirx_dev->timer_base_lock);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_0_INT_CLEAR, 0xffffffff);
+	/* en irq */
+	hdmirx_update_bits(hdmirx_dev, MAINUNIT_0_INT_MASK_N,
+			   TIMER_BASE_LOCKED_IRQ, TIMER_BASE_LOCKED_IRQ);
+	/* write irefclk freq */
+	hdmirx_writel(hdmirx_dev, GLOBAL_TIMER_REF_BASE, IREF_CLK_FREQ_HZ);
+
+	if (!wait_for_completion_timeout(&hdmirx_dev->timer_base_lock,
+					 msecs_to_jiffies(20)))
+		dev_err(dev, "%s wait timer base lock failed\n", __func__);
+
+	hdmirx_update_bits(hdmirx_dev, CMU_CONFIG0,
+			   TMDSQPCLK_STABLE_FREQ_MARGIN_MASK |
+			   AUDCLK_STABLE_FREQ_MARGIN_MASK,
+			   TMDSQPCLK_STABLE_FREQ_MARGIN(2) |
+			   AUDCLK_STABLE_FREQ_MARGIN(1));
+	hdmirx_update_bits(hdmirx_dev, DESCRAND_EN_CONTROL,
+			   SCRAMB_EN_SEL_QST_MASK, SCRAMB_EN_SEL_QST(1));
+	hdmirx_update_bits(hdmirx_dev, CED_CONFIG,
+			   CED_VIDDATACHECKEN_QST |
+			   CED_DATAISCHECKEN_QST |
+			   CED_GBCHECKEN_QST |
+			   CED_CTRLCHECKEN_QST |
+			   CED_CHLOCKMAXER_QST_MASK,
+			   CED_VIDDATACHECKEN_QST |
+			   CED_GBCHECKEN_QST |
+			   CED_CTRLCHECKEN_QST |
+			   CED_CHLOCKMAXER_QST(0x10));
+	hdmirx_update_bits(hdmirx_dev, DEFRAMER_CONFIG0,
+			   VS_REMAPFILTER_EN_QST | VS_FILTER_ORDER_QST_MASK,
+			   VS_REMAPFILTER_EN_QST | VS_FILTER_ORDER_QST(0x3));
+}
+
+static void hdmirx_set_negative_pol(struct snps_hdmirx_dev *hdmirx_dev, bool en)
+{
+	if (en) {
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6,
+				   VSYNC_TOGGLE_EN | HSYNC_TOGGLE_EN,
+				   VSYNC_TOGGLE_EN | HSYNC_TOGGLE_EN);
+		hdmirx_update_bits(hdmirx_dev, VIDEO_CONFIG2,
+				   VPROC_VSYNC_POL_OVR_VALUE |
+				   VPROC_VSYNC_POL_OVR_EN |
+				   VPROC_HSYNC_POL_OVR_VALUE |
+				   VPROC_HSYNC_POL_OVR_EN,
+				   VPROC_VSYNC_POL_OVR_EN |
+				   VPROC_HSYNC_POL_OVR_EN);
+		return;
+	}
+
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6,
+			   VSYNC_TOGGLE_EN | HSYNC_TOGGLE_EN, 0);
+
+	hdmirx_update_bits(hdmirx_dev, VIDEO_CONFIG2,
+			   VPROC_VSYNC_POL_OVR_VALUE |
+			   VPROC_VSYNC_POL_OVR_EN |
+			   VPROC_HSYNC_POL_OVR_VALUE |
+			   VPROC_HSYNC_POL_OVR_EN, 0);
+}
+
+static int hdmirx_try_to_get_timings(struct snps_hdmirx_dev *hdmirx_dev,
+				     struct v4l2_dv_timings *timings,
+				     int try_cnt)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	int i, cnt = 0, fail_cnt = 0, ret = 0;
+	bool from_dma = false;
+
+	hdmirx_set_negative_pol(hdmirx_dev, false);
+	for (i = 0; i < try_cnt; i++) {
+		ret = hdmirx_get_detected_timings(hdmirx_dev, timings, from_dma);
+		if (ret) {
+			cnt = 0;
+			fail_cnt++;
+			if (fail_cnt > 3) {
+				hdmirx_set_negative_pol(hdmirx_dev, true);
+				from_dma = true;
+			}
+		} else {
+			cnt++;
+		}
+		if (cnt >= 5)
+			break;
+
+		usleep_range(10 * 1000, 10 * 1100);
+	}
+
+	if (try_cnt > 8 && cnt < 5)
+		v4l2_dbg(1, debug, v4l2_dev, "%s: res not stable\n", __func__);
+
+	return ret;
+}
+
+static void hdmirx_format_change(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_dv_timings timings;
+	struct hdmirx_stream *stream = &hdmirx_dev->stream;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	const struct v4l2_event ev_src_chg = {
+		.type = V4L2_EVENT_SOURCE_CHANGE,
+		.u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION,
+	};
+
+	if (hdmirx_try_to_get_timings(hdmirx_dev, &timings, 20)) {
+		queue_delayed_work(system_unbound_wq,
+				   &hdmirx_dev->delayed_work_hotplug,
+				   msecs_to_jiffies(20));
+		return;
+	}
+
+	if (!v4l2_match_dv_timings(&hdmirx_dev->timings, &timings, 0, false)) {
+		/* automatically set timing rather than set by userspace */
+		hdmirx_dev->timings = timings;
+		v4l2_print_dv_timings(hdmirx_dev->v4l2_dev.name,
+				      "New format: ", &timings, false);
+	}
+
+	hdmirx_dev->got_timing = true;
+	v4l2_dbg(1, debug, v4l2_dev, "%s: queue res_chg_event\n", __func__);
+	v4l2_event_queue(&stream->vdev, &ev_src_chg);
+}
+
+static void hdmirx_set_ddr_store_fmt(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	enum ddr_store_fmt store_fmt;
+	u32 dma_cfg1;
+
+	switch (hdmirx_dev->pix_fmt) {
+	case HDMIRX_RGB888:
+		store_fmt = STORE_RGB888;
+		break;
+	case HDMIRX_YUV444:
+		store_fmt = STORE_YUV444_8BIT;
+		break;
+	case HDMIRX_YUV422:
+		store_fmt = STORE_YUV422_8BIT;
+		break;
+	case HDMIRX_YUV420:
+		store_fmt = STORE_YUV420_8BIT;
+		break;
+	default:
+		store_fmt = STORE_RGB888;
+		break;
+	}
+
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG1,
+			   DDR_STORE_FORMAT_MASK, DDR_STORE_FORMAT(store_fmt));
+	dma_cfg1 = hdmirx_readl(hdmirx_dev, DMA_CONFIG1);
+	v4l2_dbg(1, debug, v4l2_dev, "%s: pix_fmt: %s, DMA_CONFIG1:%#x\n",
+		 __func__, pix_fmt_str[hdmirx_dev->pix_fmt], dma_cfg1);
+}
+
+static int hdmirx_wait_lock_and_get_timing(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 mu_status, scdc_status, dma_st10, cmu_st;
+	u32 i;
+
+	for (i = 0; i < 300; i++) {
+		mu_status = hdmirx_readl(hdmirx_dev, MAINUNIT_STATUS);
+		scdc_status = hdmirx_readl(hdmirx_dev, SCDC_REGBANK_STATUS3);
+		dma_st10 = hdmirx_readl(hdmirx_dev, DMA_STATUS10);
+		cmu_st = hdmirx_readl(hdmirx_dev, CMU_STATUS);
+
+		if ((mu_status & TMDSVALID_STABLE_ST) &&
+		    (dma_st10 & HDMIRX_LOCK) &&
+		    (cmu_st & TMDSQPCLK_LOCKED_ST))
+			break;
+
+		if (!tx_5v_power_present(hdmirx_dev)) {
+			v4l2_err(v4l2_dev, "%s: HDMI pull out, return\n", __func__);
+			return -1;
+		}
+
+		hdmirx_tmds_clk_ratio_config(hdmirx_dev);
+	}
+
+	if (i == 300) {
+		v4l2_err(v4l2_dev, "%s: signal not lock, tmds_clk_ratio:%d\n",
+			 __func__, hdmirx_dev->tmds_clk_ratio);
+		v4l2_err(v4l2_dev, "%s: mu_st:%#x, scdc_st:%#x, dma_st10:%#x\n",
+			 __func__, mu_status, scdc_status, dma_st10);
+		return -1;
+	}
+
+	v4l2_info(v4l2_dev, "%s: signal lock ok, i:%d\n", __func__, i);
+	hdmirx_writel(hdmirx_dev, GLOBAL_SWRESET_REQUEST, DATAPATH_SWRESETREQ);
+
+	reinit_completion(&hdmirx_dev->avi_pkt_rcv);
+	hdmirx_clear_interrupt(hdmirx_dev, PKT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_update_bits(hdmirx_dev, PKT_2_INT_MASK_N,
+			   PKTDEC_AVIIF_RCV_IRQ, PKTDEC_AVIIF_RCV_IRQ);
+
+	if (!wait_for_completion_timeout(&hdmirx_dev->avi_pkt_rcv,
+					 msecs_to_jiffies(300))) {
+		v4l2_err(v4l2_dev, "%s wait avi_pkt_rcv failed\n", __func__);
+		hdmirx_update_bits(hdmirx_dev, PKT_2_INT_MASK_N,
+				   PKTDEC_AVIIF_RCV_IRQ, 0);
+	}
+
+	usleep_range(50 * 1000, 50 * 1010);
+	hdmirx_format_change(hdmirx_dev);
+
+	return 0;
+}
+
+static void hdmirx_dma_config(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	hdmirx_set_ddr_store_fmt(hdmirx_dev);
+
+	/* Note: uv_swap, rb can not swap, doc err*/
+	if (hdmirx_dev->cur_fmt_fourcc != V4L2_PIX_FMT_NV16)
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, RB_SWAP_EN, RB_SWAP_EN);
+	else
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, RB_SWAP_EN, 0);
+
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG7,
+			   LOCK_FRAME_NUM_MASK,
+			   LOCK_FRAME_NUM(2));
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG1,
+			   UV_WID_MASK | Y_WID_MASK | ABANDON_EN,
+			   UV_WID(1) | Y_WID(2) | ABANDON_EN);
+}
+
+static void hdmirx_submodule_init(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	/* Note: if not config HDCP2_CONFIG, there will be some errors; */
+	hdmirx_update_bits(hdmirx_dev, HDCP2_CONFIG,
+			   HDCP2_SWITCH_OVR_VALUE |
+			   HDCP2_SWITCH_OVR_EN,
+			   HDCP2_SWITCH_OVR_EN);
+	hdmirx_scdc_init(hdmirx_dev);
+	hdmirx_controller_init(hdmirx_dev);
+}
+
+static int hdmirx_enum_input(struct file *file, void *priv,
+			     struct v4l2_input *input)
+{
+	if (input->index > 0)
+		return -EINVAL;
+
+	input->type = V4L2_INPUT_TYPE_CAMERA;
+	input->std = 0;
+	strscpy(input->name, "hdmirx", sizeof(input->name));
+	input->capabilities = V4L2_IN_CAP_DV_TIMINGS;
+
+	return 0;
+}
+
+static int hdmirx_get_input(struct file *file, void *priv, unsigned int *i)
+{
+	*i = 0;
+	return 0;
+}
+
+static int hdmirx_set_input(struct file *file, void *priv, unsigned int i)
+{
+	if (i)
+		return -EINVAL;
+	return 0;
+}
+
+static int fcc_xysubs(u32 fcc, u32 *xsubs, u32 *ysubs)
+{
+	/* Note: cbcr plane bpp is 16 bit */
+	switch (fcc) {
+	case V4L2_PIX_FMT_NV24:
+		*xsubs = 1;
+		*ysubs = 1;
+		break;
+	case V4L2_PIX_FMT_NV16:
+		*xsubs = 2;
+		*ysubs = 1;
+		break;
+	case V4L2_PIX_FMT_NV12:
+		*xsubs = 2;
+		*ysubs = 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static u32 hdmirx_align_bits_per_pixel(const struct hdmirx_output_fmt *fmt,
+				       int plane_index)
+{
+	u32 bpp = 0;
+
+	if (fmt) {
+		switch (fmt->fourcc) {
+		case V4L2_PIX_FMT_NV24:
+		case V4L2_PIX_FMT_NV16:
+		case V4L2_PIX_FMT_NV12:
+		case V4L2_PIX_FMT_BGR24:
+			bpp = fmt->bpp[plane_index];
+			break;
+		default:
+			pr_err("fourcc: %#x is not supported\n", fmt->fourcc);
+			break;
+		}
+	}
+
+	return bpp;
+}
+
+static const struct hdmirx_output_fmt *find_output_fmt(u32 pixelfmt)
+{
+	const struct hdmirx_output_fmt *fmt;
+	u32 i;
+
+	for (i = 0; i < ARRAY_SIZE(g_out_fmts); i++) {
+		fmt = &g_out_fmts[i];
+		if (fmt->fourcc == pixelfmt)
+			return fmt;
+	}
+
+	return NULL;
+}
+
+static void hdmirx_set_fmt(struct hdmirx_stream *stream,
+			   struct v4l2_pix_format_mplane *pixm, bool try)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct v4l2_bt_timings *bt = &hdmirx_dev->timings.bt;
+	const struct hdmirx_output_fmt *fmt;
+	unsigned int imagesize = 0, planes;
+	u32 xsubs = 1, ysubs = 1, i;
+
+	memset(&pixm->plane_fmt[0], 0, sizeof(struct v4l2_plane_pix_format));
+	fmt = find_output_fmt(pixm->pixelformat);
+	if (!fmt) {
+		fmt = &g_out_fmts[0];
+		v4l2_err(v4l2_dev,
+			 "%s: set_fmt:%#x not supported, use def_fmt:%x\n",
+			 __func__, pixm->pixelformat, fmt->fourcc);
+	}
+
+	if (!bt->width || !bt->height)
+		v4l2_err(v4l2_dev, "%s: invalid resolution:%#xx%#x\n",
+			 __func__, bt->width, bt->height);
+
+	pixm->pixelformat = fmt->fourcc;
+	pixm->width = bt->width;
+	pixm->height = bt->height;
+	pixm->num_planes = fmt->mplanes;
+	pixm->quantization = V4L2_QUANTIZATION_DEFAULT;
+	pixm->colorspace = V4L2_COLORSPACE_SRGB;
+	pixm->ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT;
+
+	if (bt->interlaced == V4L2_DV_INTERLACED)
+		pixm->field = V4L2_FIELD_INTERLACED_TB;
+	else
+		pixm->field = V4L2_FIELD_NONE;
+
+	memset(pixm->reserved, 0, sizeof(pixm->reserved));
+
+	/* calculate plane size and image size */
+	fcc_xysubs(fmt->fourcc, &xsubs, &ysubs);
+	planes = fmt->cplanes ? fmt->cplanes : fmt->mplanes;
+
+	for (i = 0; i < planes; i++) {
+		struct v4l2_plane_pix_format *plane_fmt;
+		int width, height, bpl, size, bpp;
+
+		if (!i) {
+			width = pixm->width;
+			height = pixm->height;
+		} else {
+			width = pixm->width / xsubs;
+			height = pixm->height / ysubs;
+		}
+
+		bpp = hdmirx_align_bits_per_pixel(fmt, i);
+		bpl = ALIGN(width * bpp / HDMIRX_STORED_BIT_WIDTH,
+			    MEMORY_ALIGN_ROUND_UP_BYTES);
+		size = bpl * height;
+		imagesize += size;
+
+		if (fmt->mplanes > i) {
+			/* Set bpl and size for each mplane */
+			plane_fmt = pixm->plane_fmt + i;
+			plane_fmt->bytesperline = bpl;
+			plane_fmt->sizeimage = size;
+		}
+
+		v4l2_dbg(1, debug, v4l2_dev,
+			 "C-Plane %i size: %d, Total imagesize: %d\n",
+			 i, size, imagesize);
+	}
+
+	/* convert to non-MPLANE format.
+	 * It's important since we want to unify non-MPLANE and MPLANE.
+	 */
+	if (fmt->mplanes == 1)
+		pixm->plane_fmt[0].sizeimage = imagesize;
+
+	if (!try) {
+		stream->out_fmt = fmt;
+		stream->pixm = *pixm;
+
+		v4l2_dbg(1, debug, v4l2_dev,
+			 "%s: req(%d, %d), out(%d, %d), fmt:%#x\n", __func__,
+			 pixm->width, pixm->height, stream->pixm.width,
+			 stream->pixm.height, fmt->fourcc);
+	}
+}
+
+static int hdmirx_enum_fmt_vid_cap_mplane(struct file *file, void *priv,
+					  struct v4l2_fmtdesc *f)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+
+	if (f->index >= 1)
+		return -EINVAL;
+
+	f->pixelformat = hdmirx_dev->cur_fmt_fourcc;
+
+	return 0;
+}
+
+static int hdmirx_s_fmt_vid_cap_mplane(struct file *file,
+				       void *priv, struct v4l2_format *f)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	if (vb2_is_busy(&stream->buf_queue)) {
+		v4l2_err(v4l2_dev, "%s: queue busy\n", __func__);
+		return -EBUSY;
+	}
+
+	hdmirx_set_fmt(stream, &f->fmt.pix_mp, false);
+
+	return 0;
+}
+
+static int hdmirx_g_fmt_vid_cap_mplane(struct file *file, void *fh,
+				       struct v4l2_format *f)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_pix_format_mplane pixm = {};
+
+	pixm.pixelformat = hdmirx_dev->cur_fmt_fourcc;
+	hdmirx_set_fmt(stream, &pixm, true);
+	f->fmt.pix_mp = pixm;
+
+	return 0;
+}
+
+static int hdmirx_g_dv_timings(struct file *file, void *_fh,
+			       struct v4l2_dv_timings *timings)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 dma_cfg1;
+
+	*timings = hdmirx_dev->timings;
+	dma_cfg1 = hdmirx_readl(hdmirx_dev, DMA_CONFIG1);
+	v4l2_dbg(1, debug, v4l2_dev, "%s: pix_fmt: %s, DMA_CONFIG1:%#x\n",
+		 __func__, pix_fmt_str[hdmirx_dev->pix_fmt], dma_cfg1);
+
+	return 0;
+}
+
+static int hdmirx_s_dv_timings(struct file *file, void *_fh,
+			       struct v4l2_dv_timings *timings)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	if (!timings)
+		return -EINVAL;
+
+	if (debug)
+		v4l2_print_dv_timings(hdmirx_dev->v4l2_dev.name,
+				      "s_dv_timings: ", timings, false);
+
+	if (!v4l2_valid_dv_timings(timings, &hdmirx_timings_cap, NULL, NULL)) {
+		v4l2_dbg(1, debug, v4l2_dev,
+			 "%s: timings out of range\n", __func__);
+		return -ERANGE;
+	}
+
+	/* Check if the timings are part of the CEA-861 timings. */
+	v4l2_find_dv_timings_cap(timings, &hdmirx_timings_cap, 0, NULL, NULL);
+
+	if (v4l2_match_dv_timings(&hdmirx_dev->timings, timings, 0, false)) {
+		v4l2_dbg(1, debug, v4l2_dev, "%s: no change\n", __func__);
+		return 0;
+	}
+
+	/*
+	 * Changing the timings implies a format change, which is not allowed
+	 * while buffers for use with streaming have already been allocated.
+	 */
+	if (vb2_is_busy(&stream->buf_queue))
+		return -EBUSY;
+
+	hdmirx_dev->timings = *timings;
+	/* Update the internal format */
+	hdmirx_set_fmt(stream, &stream->pixm, false);
+
+	return 0;
+}
+
+static int hdmirx_querycap(struct file *file, void *priv,
+			   struct v4l2_capability *cap)
+{
+	struct hdmirx_stream *stream = video_drvdata(file);
+	struct device *dev = stream->hdmirx_dev->dev;
+
+	strscpy(cap->driver, dev->driver->name, sizeof(cap->driver));
+	strscpy(cap->card, dev->driver->name, sizeof(cap->card));
+	snprintf(cap->bus_info, sizeof(cap->bus_info), "platform: snps_hdmirx");
+
+	return 0;
+}
+
+static int hdmirx_queue_setup(struct vb2_queue *queue,
+			      unsigned int *num_buffers,
+			      unsigned int *num_planes,
+			      unsigned int sizes[],
+			      struct device *alloc_ctxs[])
+{
+	struct hdmirx_stream *stream = vb2_get_drv_priv(queue);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	const struct v4l2_pix_format_mplane *pixm = NULL;
+	const struct hdmirx_output_fmt *out_fmt;
+	u32 i, height;
+
+	pixm = &stream->pixm;
+	out_fmt = stream->out_fmt;
+
+	if (!num_planes || !out_fmt) {
+		v4l2_err(v4l2_dev, "%s: out_fmt not set\n", __func__);
+		return -EINVAL;
+	}
+
+	if (*num_planes) {
+		if (*num_planes != pixm->num_planes)
+			return -EINVAL;
+
+		for (i = 0; i < *num_planes; i++)
+			if (sizes[i] < pixm->plane_fmt[i].sizeimage)
+				return -EINVAL;
+	}
+
+	*num_planes = out_fmt->mplanes;
+	height = pixm->height;
+
+	for (i = 0; i < out_fmt->mplanes; i++) {
+		const struct v4l2_plane_pix_format *plane_fmt;
+		int h = height;
+
+		plane_fmt = &pixm->plane_fmt[i];
+		sizes[i] = plane_fmt->sizeimage / height * h;
+	}
+
+	v4l2_dbg(1, debug, v4l2_dev, "%s: count %d, size %d\n",
+		 v4l2_type_names[queue->type], *num_buffers, sizes[0]);
+
+	return 0;
+}
+
+/*
+ * The vb2_buffer are stored in hdmirx_buffer, in order to unify
+ * mplane buffer and none-mplane buffer.
+ */
+static void hdmirx_buf_queue(struct vb2_buffer *vb)
+{
+	const struct hdmirx_output_fmt *out_fmt;
+	struct vb2_v4l2_buffer *vbuf;
+	struct hdmirx_buffer *hdmirx_buf;
+	struct vb2_queue *queue;
+	struct hdmirx_stream *stream;
+	const struct v4l2_pix_format_mplane *pixm;
+	unsigned long lock_flags = 0;
+	int i;
+
+	vbuf = to_vb2_v4l2_buffer(vb);
+	hdmirx_buf = container_of(vbuf, struct hdmirx_buffer, vb);
+	queue = vb->vb2_queue;
+	stream = vb2_get_drv_priv(queue);
+	pixm = &stream->pixm;
+	out_fmt = stream->out_fmt;
+
+	memset(hdmirx_buf->buff_addr, 0, sizeof(hdmirx_buf->buff_addr));
+	/*
+	 * If mplanes > 1, every c-plane has its own m-plane,
+	 * otherwise, multiple c-planes are in the same m-plane
+	 */
+	for (i = 0; i < out_fmt->mplanes; i++)
+		hdmirx_buf->buff_addr[i] = vb2_dma_contig_plane_dma_addr(vb, i);
+
+	if (out_fmt->mplanes == 1) {
+		if (out_fmt->cplanes == 1) {
+			hdmirx_buf->buff_addr[HDMIRX_PLANE_CBCR] =
+				hdmirx_buf->buff_addr[HDMIRX_PLANE_Y];
+		} else {
+			for (i = 0; i < out_fmt->cplanes - 1; i++)
+				hdmirx_buf->buff_addr[i + 1] =
+					hdmirx_buf->buff_addr[i] +
+					pixm->plane_fmt[i].bytesperline *
+					pixm->height;
+		}
+	}
+
+	spin_lock_irqsave(&stream->vbq_lock, lock_flags);
+	list_add_tail(&hdmirx_buf->queue, &stream->buf_head);
+	spin_unlock_irqrestore(&stream->vbq_lock, lock_flags);
+}
+
+static void return_all_buffers(struct hdmirx_stream *stream,
+			       enum vb2_buffer_state state)
+{
+	struct hdmirx_buffer *buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&stream->vbq_lock, flags);
+	if (stream->curr_buf)
+		list_add_tail(&stream->curr_buf->queue, &stream->buf_head);
+	if (stream->next_buf && stream->next_buf != stream->curr_buf)
+		list_add_tail(&stream->next_buf->queue, &stream->buf_head);
+	stream->curr_buf = NULL;
+	stream->next_buf = NULL;
+
+	while (!list_empty(&stream->buf_head)) {
+		buf = list_first_entry(&stream->buf_head,
+				       struct hdmirx_buffer, queue);
+		list_del(&buf->queue);
+		spin_unlock_irqrestore(&stream->vbq_lock, flags);
+		vb2_buffer_done(&buf->vb.vb2_buf, state);
+		spin_lock_irqsave(&stream->vbq_lock, flags);
+	}
+	spin_unlock_irqrestore(&stream->vbq_lock, flags);
+}
+
+static void hdmirx_stop_streaming(struct vb2_queue *queue)
+{
+	struct hdmirx_stream *stream = vb2_get_drv_priv(queue);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	int ret;
+
+	v4l2_info(v4l2_dev, "stream start stopping\n");
+	mutex_lock(&hdmirx_dev->stream_lock);
+	WRITE_ONCE(stream->stopping, true);
+
+	/* wait last irq to return the buffer */
+	ret = wait_event_timeout(stream->wq_stopped, !stream->stopping,
+				 msecs_to_jiffies(500));
+	if (!ret) {
+		v4l2_err(v4l2_dev, "%s: timeout waiting last irq\n",
+			 __func__);
+		WRITE_ONCE(stream->stopping, false);
+	}
+
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, HDMIRX_DMA_EN, 0);
+	return_all_buffers(stream, VB2_BUF_STATE_ERROR);
+	mutex_unlock(&hdmirx_dev->stream_lock);
+	v4l2_info(v4l2_dev, "stream stopping finished\n");
+}
+
+static int hdmirx_start_streaming(struct vb2_queue *queue, unsigned int count)
+{
+	struct hdmirx_stream *stream = vb2_get_drv_priv(queue);
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct v4l2_dv_timings timings = hdmirx_dev->timings;
+	struct v4l2_bt_timings *bt = &timings.bt;
+	unsigned long lock_flags = 0;
+	int line_flag;
+
+	if (!hdmirx_dev->got_timing) {
+		v4l2_err(v4l2_dev, "timing is invalid\n");
+		return 0;
+	}
+
+	mutex_lock(&hdmirx_dev->stream_lock);
+	stream->frame_idx = 0;
+	stream->line_flag_int_cnt = 0;
+	stream->curr_buf = NULL;
+	stream->next_buf = NULL;
+	stream->irq_stat = 0;
+	WRITE_ONCE(stream->stopping, false);
+
+	spin_lock_irqsave(&stream->vbq_lock, lock_flags);
+	if (!stream->curr_buf) {
+		if (!list_empty(&stream->buf_head)) {
+			stream->curr_buf = list_first_entry(&stream->buf_head,
+							    struct hdmirx_buffer,
+							    queue);
+			list_del(&stream->curr_buf->queue);
+		} else {
+			stream->curr_buf = NULL;
+		}
+	}
+	spin_unlock_irqrestore(&stream->vbq_lock, lock_flags);
+
+	if (!stream->curr_buf) {
+		mutex_unlock(&hdmirx_dev->stream_lock);
+		return -ENOMEM;
+	}
+
+	v4l2_dbg(2, debug, v4l2_dev,
+		 "%s: start_stream cur_buf y_addr:%#x, uv_addr:%#x\n",
+		 __func__, stream->curr_buf->buff_addr[HDMIRX_PLANE_Y],
+		 stream->curr_buf->buff_addr[HDMIRX_PLANE_CBCR]);
+	hdmirx_writel(hdmirx_dev, DMA_CONFIG2,
+		      stream->curr_buf->buff_addr[HDMIRX_PLANE_Y]);
+	hdmirx_writel(hdmirx_dev, DMA_CONFIG3,
+		      stream->curr_buf->buff_addr[HDMIRX_PLANE_CBCR]);
+
+	if (bt->height) {
+		if (bt->interlaced == V4L2_DV_INTERLACED)
+			line_flag = bt->height / 4;
+		else
+			line_flag = bt->height / 2;
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG7,
+				   LINE_FLAG_NUM_MASK,
+				   LINE_FLAG_NUM(line_flag));
+	} else {
+		v4l2_err(v4l2_dev, "height err: %d\n", bt->height);
+	}
+
+	hdmirx_writel(hdmirx_dev, DMA_CONFIG5, 0xffffffff);
+	hdmirx_writel(hdmirx_dev, CED_DYN_CONTROL, 0x1);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG4,
+			   LINE_FLAG_INT_EN |
+			   HDMIRX_DMA_IDLE_INT |
+			   HDMIRX_LOCK_DISABLE_INT |
+			   LAST_FRAME_AXI_UNFINISH_INT_EN |
+			   FIFO_OVERFLOW_INT_EN |
+			   FIFO_UNDERFLOW_INT_EN |
+			   HDMIRX_AXI_ERROR_INT_EN,
+			   LINE_FLAG_INT_EN |
+			   HDMIRX_DMA_IDLE_INT |
+			   HDMIRX_LOCK_DISABLE_INT |
+			   LAST_FRAME_AXI_UNFINISH_INT_EN |
+			   FIFO_OVERFLOW_INT_EN |
+			   FIFO_UNDERFLOW_INT_EN |
+			   HDMIRX_AXI_ERROR_INT_EN);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, HDMIRX_DMA_EN, HDMIRX_DMA_EN);
+	v4l2_dbg(1, debug, v4l2_dev, "%s: enable dma", __func__);
+	mutex_unlock(&hdmirx_dev->stream_lock);
+
+	return 0;
+}
+
+/* vb2 queue */
+static const struct vb2_ops hdmirx_vb2_ops = {
+	.queue_setup = hdmirx_queue_setup,
+	.buf_queue = hdmirx_buf_queue,
+	.wait_prepare = vb2_ops_wait_prepare,
+	.wait_finish = vb2_ops_wait_finish,
+	.stop_streaming = hdmirx_stop_streaming,
+	.start_streaming = hdmirx_start_streaming,
+};
+
+static int hdmirx_init_vb2_queue(struct vb2_queue *q,
+				 struct hdmirx_stream *stream,
+				 enum v4l2_buf_type buf_type)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+
+	q->type = buf_type;
+	q->io_modes = VB2_MMAP | VB2_DMABUF;
+	q->drv_priv = stream;
+	q->ops = &hdmirx_vb2_ops;
+	q->mem_ops = &vb2_dma_contig_memops;
+	q->buf_struct_size = sizeof(struct hdmirx_buffer);
+	q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+	q->lock = &stream->vlock;
+	q->dev = hdmirx_dev->dev;
+	q->allow_cache_hints = 0;
+	q->bidirectional = 1;
+	q->dma_attrs = DMA_ATTR_FORCE_CONTIGUOUS;
+	return vb2_queue_init(q);
+}
+
+/* video device */
+static const struct v4l2_ioctl_ops hdmirx_v4l2_ioctl_ops = {
+	.vidioc_querycap = hdmirx_querycap,
+	.vidioc_try_fmt_vid_cap_mplane = hdmirx_g_fmt_vid_cap_mplane,
+	.vidioc_s_fmt_vid_cap_mplane = hdmirx_s_fmt_vid_cap_mplane,
+	.vidioc_g_fmt_vid_cap_mplane = hdmirx_g_fmt_vid_cap_mplane,
+	.vidioc_enum_fmt_vid_cap = hdmirx_enum_fmt_vid_cap_mplane,
+
+	.vidioc_s_dv_timings = hdmirx_s_dv_timings,
+	.vidioc_g_dv_timings = hdmirx_g_dv_timings,
+	.vidioc_enum_dv_timings = hdmirx_enum_dv_timings,
+	.vidioc_query_dv_timings = hdmirx_query_dv_timings,
+	.vidioc_dv_timings_cap = hdmirx_dv_timings_cap,
+	.vidioc_enum_input = hdmirx_enum_input,
+	.vidioc_g_input = hdmirx_get_input,
+	.vidioc_s_input = hdmirx_set_input,
+	.vidioc_g_edid = hdmirx_get_edid,
+	.vidioc_s_edid = hdmirx_set_edid,
+	.vidioc_g_parm = hdmirx_g_parm,
+
+	.vidioc_reqbufs = vb2_ioctl_reqbufs,
+	.vidioc_querybuf = vb2_ioctl_querybuf,
+	.vidioc_create_bufs = vb2_ioctl_create_bufs,
+	.vidioc_qbuf = vb2_ioctl_qbuf,
+	.vidioc_expbuf = vb2_ioctl_expbuf,
+	.vidioc_dqbuf = vb2_ioctl_dqbuf,
+	.vidioc_prepare_buf = vb2_ioctl_prepare_buf,
+	.vidioc_streamon = vb2_ioctl_streamon,
+	.vidioc_streamoff = vb2_ioctl_streamoff,
+
+	.vidioc_log_status = v4l2_ctrl_log_status,
+	.vidioc_subscribe_event = hdmirx_subscribe_event,
+	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+static const struct v4l2_file_operations hdmirx_fops = {
+	.owner = THIS_MODULE,
+	.open = v4l2_fh_open,
+	.release = vb2_fop_release,
+	.unlocked_ioctl = video_ioctl2,
+	.read = vb2_fop_read,
+	.poll = vb2_fop_poll,
+	.mmap = vb2_fop_mmap,
+};
+
+static int hdmirx_register_stream_vdev(struct hdmirx_stream *stream)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = stream->hdmirx_dev;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct video_device *vdev = &stream->vdev;
+	char *vdev_name;
+	int ret = 0;
+
+	vdev_name = "stream_hdmirx";
+	strscpy(vdev->name, vdev_name, sizeof(vdev->name));
+	INIT_LIST_HEAD(&stream->buf_head);
+	spin_lock_init(&stream->vbq_lock);
+	mutex_init(&stream->vlock);
+	init_waitqueue_head(&stream->wq_stopped);
+	stream->curr_buf = NULL;
+	stream->next_buf = NULL;
+
+	vdev->ioctl_ops = &hdmirx_v4l2_ioctl_ops;
+	vdev->release = video_device_release_empty;
+	vdev->fops = &hdmirx_fops;
+	vdev->minor = -1;
+	vdev->v4l2_dev = v4l2_dev;
+	vdev->lock = &stream->vlock;
+	vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE_MPLANE |
+			    V4L2_CAP_STREAMING;
+	video_set_drvdata(vdev, stream);
+	vdev->vfl_dir = VFL_DIR_RX;
+
+	hdmirx_init_vb2_queue(&stream->buf_queue, stream,
+			      V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
+	vdev->queue = &stream->buf_queue;
+
+	ret = video_register_device(vdev, VFL_TYPE_VIDEO, -1);
+	if (ret < 0) {
+		v4l2_err(v4l2_dev, "video_register_device failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void process_signal_change(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG6, HDMIRX_DMA_EN, 0);
+	hdmirx_update_bits(hdmirx_dev, DMA_CONFIG4,
+			   LINE_FLAG_INT_EN |
+			   HDMIRX_DMA_IDLE_INT |
+			   HDMIRX_LOCK_DISABLE_INT |
+			   LAST_FRAME_AXI_UNFINISH_INT_EN |
+			   FIFO_OVERFLOW_INT_EN |
+			   FIFO_UNDERFLOW_INT_EN |
+			   HDMIRX_AXI_ERROR_INT_EN, 0);
+	hdmirx_reset_dma(hdmirx_dev);
+	hdmirx_dev->got_timing = false;
+	queue_delayed_work(system_unbound_wq,
+			   &hdmirx_dev->delayed_work_res_change,
+			   msecs_to_jiffies(50));
+}
+
+static void avpunit_0_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				  int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	if (status & (CED_DYN_CNT_CH2_IRQ |
+		      CED_DYN_CNT_CH1_IRQ |
+		      CED_DYN_CNT_CH0_IRQ)) {
+		process_signal_change(hdmirx_dev);
+		v4l2_dbg(2, debug, v4l2_dev, "%s: avp0_st:%#x\n",
+			 __func__, status);
+		*handled = true;
+	}
+
+	hdmirx_clear_interrupt(hdmirx_dev, AVPUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_writel(hdmirx_dev, AVPUNIT_0_INT_FORCE, 0x0);
+}
+
+static void avpunit_1_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				  int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	if (status & DEFRAMER_VSYNC_THR_REACHED_IRQ) {
+		v4l2_info(v4l2_dev, "Vertical Sync threshold reached interrupt %#x", status);
+		hdmirx_update_bits(hdmirx_dev, AVPUNIT_1_INT_MASK_N,
+				   DEFRAMER_VSYNC_THR_REACHED_MASK_N, 0);
+		*handled = true;
+	}
+}
+
+static void mainunit_0_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				   int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	v4l2_dbg(2, debug, v4l2_dev, "mu0_st:%#x\n", status);
+	if (status & TIMER_BASE_LOCKED_IRQ) {
+		hdmirx_update_bits(hdmirx_dev, MAINUNIT_0_INT_MASK_N,
+				   TIMER_BASE_LOCKED_IRQ, 0);
+		complete(&hdmirx_dev->timer_base_lock);
+		*handled = true;
+	}
+
+	if (status & TMDSQPCLK_OFF_CHG) {
+		process_signal_change(hdmirx_dev);
+		v4l2_dbg(2, debug, v4l2_dev, "%s: TMDSQPCLK_OFF_CHG\n", __func__);
+		*handled = true;
+	}
+
+	if (status & TMDSQPCLK_LOCKED_CHG) {
+		process_signal_change(hdmirx_dev);
+		v4l2_dbg(2, debug, v4l2_dev, "%s: TMDSQPCLK_LOCKED_CHG\n", __func__);
+		*handled = true;
+	}
+
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_writel(hdmirx_dev, MAINUNIT_0_INT_FORCE, 0x0);
+}
+
+static void mainunit_2_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				   int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	v4l2_dbg(2, debug, v4l2_dev, "mu2_st:%#x\n", status);
+	if (status & PHYCREG_CR_WRITE_DONE) {
+		hdmirx_update_bits(hdmirx_dev, MAINUNIT_2_INT_MASK_N,
+				   PHYCREG_CR_WRITE_DONE, 0);
+		complete(&hdmirx_dev->cr_write_done);
+		*handled = true;
+	}
+
+	if (status & TMDSVALID_STABLE_CHG) {
+		process_signal_change(hdmirx_dev);
+		v4l2_dbg(2, debug, v4l2_dev, "%s: TMDSVALID_STABLE_CHG\n", __func__);
+		*handled = true;
+	}
+
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_writel(hdmirx_dev, MAINUNIT_2_INT_FORCE, 0x0);
+}
+
+static void pkt_2_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+			      int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	v4l2_dbg(2, debug, v4l2_dev, "%s: pk2_st:%#x\n", __func__, status);
+	if (status & PKTDEC_AVIIF_RCV_IRQ) {
+		hdmirx_update_bits(hdmirx_dev, PKT_2_INT_MASK_N,
+				   PKTDEC_AVIIF_RCV_IRQ, 0);
+		complete(&hdmirx_dev->avi_pkt_rcv);
+		v4l2_dbg(2, debug, v4l2_dev, "%s: AVIIF_RCV_IRQ\n", __func__);
+		*handled = true;
+	}
+
+	hdmirx_clear_interrupt(hdmirx_dev, PKT_2_INT_CLEAR, 0xffffffff);
+}
+
+static void scdc_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+			     int status, bool *handled)
+{
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	v4l2_dbg(2, debug, v4l2_dev, "%s: scdc_st:%#x\n", __func__, status);
+	if (status & SCDCTMDSCCFG_CHG) {
+		hdmirx_tmds_clk_ratio_config(hdmirx_dev);
+		*handled = true;
+	}
+
+	hdmirx_clear_interrupt(hdmirx_dev, SCDC_INT_CLEAR, 0xffffffff);
+}
+
+static irqreturn_t hdmirx_hdmi_irq_handler(int irq, void *dev_id)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_id;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct arm_smccc_res res;
+	u32 mu0_st, mu2_st, pk2_st, scdc_st, avp1_st, avp0_st;
+	u32 mu0_mask, mu2_mask, pk2_mask, scdc_mask, avp1_msk, avp0_msk;
+	bool handled = false;
+
+	mu0_mask = hdmirx_readl(hdmirx_dev, MAINUNIT_0_INT_MASK_N);
+	mu2_mask = hdmirx_readl(hdmirx_dev, MAINUNIT_2_INT_MASK_N);
+	pk2_mask = hdmirx_readl(hdmirx_dev, PKT_2_INT_MASK_N);
+	scdc_mask = hdmirx_readl(hdmirx_dev, SCDC_INT_MASK_N);
+	mu0_st = hdmirx_readl(hdmirx_dev, MAINUNIT_0_INT_STATUS);
+	mu2_st = hdmirx_readl(hdmirx_dev, MAINUNIT_2_INT_STATUS);
+	pk2_st = hdmirx_readl(hdmirx_dev, PKT_2_INT_STATUS);
+	scdc_st = hdmirx_readl(hdmirx_dev, SCDC_INT_STATUS);
+	avp0_st = hdmirx_readl(hdmirx_dev, AVPUNIT_0_INT_STATUS);
+	avp1_st = hdmirx_readl(hdmirx_dev, AVPUNIT_1_INT_STATUS);
+	avp0_msk = hdmirx_readl(hdmirx_dev, AVPUNIT_0_INT_MASK_N);
+	avp1_msk = hdmirx_readl(hdmirx_dev, AVPUNIT_1_INT_MASK_N);
+	mu0_st &= mu0_mask;
+	mu2_st &= mu2_mask;
+	pk2_st &= pk2_mask;
+	avp1_st &= avp1_msk;
+	avp0_st &= avp0_msk;
+	scdc_st &= scdc_mask;
+
+	if (avp0_st)
+		avpunit_0_int_handler(hdmirx_dev, avp0_st, &handled);
+	if (avp1_st)
+		avpunit_1_int_handler(hdmirx_dev, avp1_st, &handled);
+	if (mu0_st)
+		mainunit_0_int_handler(hdmirx_dev, mu0_st, &handled);
+	if (mu2_st)
+		mainunit_2_int_handler(hdmirx_dev, mu2_st, &handled);
+	if (pk2_st)
+		pkt_2_int_handler(hdmirx_dev, pk2_st, &handled);
+	if (scdc_st)
+		scdc_int_handler(hdmirx_dev, scdc_st, &handled);
+
+	if (!handled) {
+		v4l2_dbg(2, debug, v4l2_dev, "%s: hdmi irq not handled", __func__);
+		v4l2_dbg(2, debug, v4l2_dev,
+			 "avp0:%#x, avp1:%#x, mu0:%#x, mu2:%#x, pk2:%#x, scdc:%#x\n",
+			 avp0_st, avp1_st, mu0_st, mu2_st, pk2_st, scdc_st);
+	}
+
+	v4l2_dbg(2, debug, v4l2_dev, "%s: en_fiq", __func__);
+	arm_smccc_smc(RK_SIP_FIQ_CTRL, RK_SIP_FIQ_CTRL_FIQ_EN,
+		      RK_IRQ_HDMIRX_HDMI, 0, 0, 0, 0, 0, &res);
+
+	return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static void hdmirx_vb_done(struct hdmirx_stream *stream,
+			   struct vb2_v4l2_buffer *vb_done)
+{
+	const struct hdmirx_output_fmt *fmt = stream->out_fmt;
+	u32 i;
+
+	/* Dequeue a filled buffer */
+	for (i = 0; i < fmt->mplanes; i++) {
+		vb2_set_plane_payload(&vb_done->vb2_buf, i,
+				      stream->pixm.plane_fmt[i].sizeimage);
+	}
+
+	vb_done->vb2_buf.timestamp = ktime_get_ns();
+	vb2_buffer_done(&vb_done->vb2_buf, VB2_BUF_STATE_DONE);
+}
+
+static void dma_idle_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				 bool *handled)
+{
+	struct hdmirx_stream *stream = &hdmirx_dev->stream;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct v4l2_dv_timings timings = hdmirx_dev->timings;
+	struct v4l2_bt_timings *bt = &timings.bt;
+	struct vb2_v4l2_buffer *vb_done = NULL;
+
+	if (!(stream->irq_stat) && !(stream->irq_stat & LINE_FLAG_INT_EN))
+		v4l2_dbg(1, debug, v4l2_dev,
+			 "%s: last time have no line_flag_irq\n", __func__);
+
+	if (stream->line_flag_int_cnt <= FILTER_FRAME_CNT)
+		goto DMA_IDLE_OUT;
+
+	if (bt->interlaced != V4L2_DV_INTERLACED ||
+	    !(stream->line_flag_int_cnt % 2)) {
+		if (stream->next_buf) {
+			if (stream->curr_buf)
+				vb_done = &stream->curr_buf->vb;
+
+			if (vb_done) {
+				vb_done->vb2_buf.timestamp = ktime_get_ns();
+				vb_done->sequence = stream->frame_idx;
+				hdmirx_vb_done(stream, vb_done);
+				stream->frame_idx++;
+				if (stream->frame_idx == 30)
+					v4l2_info(v4l2_dev, "rcv frames\n");
+			}
+
+			stream->curr_buf = NULL;
+			if (stream->next_buf) {
+				stream->curr_buf = stream->next_buf;
+				stream->next_buf = NULL;
+			}
+		} else {
+			v4l2_dbg(3, debug, v4l2_dev,
+				 "%s: next_buf NULL, skip vb_done\n", __func__);
+		}
+	}
+
+DMA_IDLE_OUT:
+	*handled = true;
+}
+
+static void line_flag_int_handler(struct snps_hdmirx_dev *hdmirx_dev,
+				  bool *handled)
+{
+	struct hdmirx_stream *stream = &hdmirx_dev->stream;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	struct v4l2_dv_timings timings = hdmirx_dev->timings;
+	struct v4l2_bt_timings *bt = &timings.bt;
+	u32 dma_cfg6;
+
+	stream->line_flag_int_cnt++;
+	if (!(stream->irq_stat) && !(stream->irq_stat & HDMIRX_DMA_IDLE_INT))
+		v4l2_dbg(1, debug, v4l2_dev,
+			 "%s: last have no dma_idle_irq\n", __func__);
+	dma_cfg6 = hdmirx_readl(hdmirx_dev, DMA_CONFIG6);
+	if (!(dma_cfg6 & HDMIRX_DMA_EN)) {
+		v4l2_dbg(2, debug, v4l2_dev, "%s: dma not on\n", __func__);
+		goto LINE_FLAG_OUT;
+	}
+
+	if (stream->line_flag_int_cnt <= FILTER_FRAME_CNT)
+		goto LINE_FLAG_OUT;
+
+	if (bt->interlaced != V4L2_DV_INTERLACED ||
+	    !(stream->line_flag_int_cnt % 2)) {
+		if (!stream->next_buf) {
+			spin_lock(&stream->vbq_lock);
+			if (!list_empty(&stream->buf_head)) {
+				stream->next_buf = list_first_entry(&stream->buf_head,
+								    struct hdmirx_buffer,
+								    queue);
+				list_del(&stream->next_buf->queue);
+			} else {
+				stream->next_buf = NULL;
+			}
+			spin_unlock(&stream->vbq_lock);
+
+			if (stream->next_buf) {
+				hdmirx_writel(hdmirx_dev, DMA_CONFIG2,
+					      stream->next_buf->buff_addr[HDMIRX_PLANE_Y]);
+				hdmirx_writel(hdmirx_dev, DMA_CONFIG3,
+					      stream->next_buf->buff_addr[HDMIRX_PLANE_CBCR]);
+			} else {
+				v4l2_dbg(3, debug, v4l2_dev,
+					 "%s: no buffer is available\n", __func__);
+			}
+		}
+	} else {
+		v4l2_dbg(3, debug, v4l2_dev, "%s: interlace:%d, line_flag_int_cnt:%d\n",
+			 __func__, bt->interlaced, stream->line_flag_int_cnt);
+	}
+
+LINE_FLAG_OUT:
+	*handled = true;
+}
+
+static irqreturn_t hdmirx_dma_irq_handler(int irq, void *dev_id)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_id;
+	struct hdmirx_stream *stream = &hdmirx_dev->stream;
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	u32 dma_stat1, dma_stat13;
+	bool handled = false;
+
+	dma_stat1 = hdmirx_readl(hdmirx_dev, DMA_STATUS1);
+	dma_stat13 = hdmirx_readl(hdmirx_dev, DMA_STATUS13);
+	v4l2_dbg(3, debug, v4l2_dev, "dma_irq st1:%#x, st13:%d\n",
+		 dma_stat1, dma_stat13);
+
+	if (READ_ONCE(stream->stopping)) {
+		v4l2_dbg(1, debug, v4l2_dev, "%s: stop stream\n", __func__);
+		hdmirx_writel(hdmirx_dev, DMA_CONFIG5, 0xffffffff);
+		hdmirx_update_bits(hdmirx_dev, DMA_CONFIG4,
+				   LINE_FLAG_INT_EN |
+				   HDMIRX_DMA_IDLE_INT |
+				   HDMIRX_LOCK_DISABLE_INT |
+				   LAST_FRAME_AXI_UNFINISH_INT_EN |
+				   FIFO_OVERFLOW_INT_EN |
+				   FIFO_UNDERFLOW_INT_EN |
+				   HDMIRX_AXI_ERROR_INT_EN, 0);
+		WRITE_ONCE(stream->stopping, false);
+		wake_up(&stream->wq_stopped);
+		return IRQ_HANDLED;
+	}
+
+	if (dma_stat1 & HDMIRX_DMA_IDLE_INT)
+		dma_idle_int_handler(hdmirx_dev, &handled);
+
+	if (dma_stat1 & LINE_FLAG_INT_EN)
+		line_flag_int_handler(hdmirx_dev, &handled);
+
+	if (!handled)
+		v4l2_dbg(3, debug, v4l2_dev,
+			 "%s: dma irq not handled, dma_stat1:%#x\n",
+			 __func__, dma_stat1);
+
+	stream->irq_stat = dma_stat1;
+	hdmirx_writel(hdmirx_dev, DMA_CONFIG5, 0xffffffff);
+
+	return IRQ_HANDLED;
+}
+
+static void hdmirx_plugin(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct arm_smccc_res res;
+	int ret;
+
+	queue_delayed_work(system_unbound_wq,
+			   &hdmirx_dev->delayed_work_heartbeat,
+			   msecs_to_jiffies(10));
+	arm_smccc_smc(SIP_WDT_CFG, WDT_START, 0, 0, 0, 0, 0, 0, &res);
+	hdmirx_submodule_init(hdmirx_dev);
+	hdmirx_update_bits(hdmirx_dev, SCDC_CONFIG, POWERPROVIDED,
+			   POWERPROVIDED);
+	hdmirx_hpd_ctrl(hdmirx_dev, true);
+	hdmirx_phy_config(hdmirx_dev);
+	ret = hdmirx_wait_lock_and_get_timing(hdmirx_dev);
+	if (ret) {
+		hdmirx_plugout(hdmirx_dev);
+		queue_delayed_work(system_unbound_wq,
+				   &hdmirx_dev->delayed_work_hotplug,
+				   msecs_to_jiffies(200));
+		return;
+	}
+	hdmirx_dma_config(hdmirx_dev);
+	hdmirx_interrupts_setup(hdmirx_dev, true);
+}
+
+static void hdmirx_delayed_work_hotplug(struct work_struct *work)
+{
+	struct snps_hdmirx_dev *hdmirx_dev;
+	bool plugin;
+
+	hdmirx_dev = container_of(work, struct snps_hdmirx_dev,
+				  delayed_work_hotplug.work);
+
+	mutex_lock(&hdmirx_dev->work_lock);
+	hdmirx_dev->got_timing = false;
+	plugin = tx_5v_power_present(hdmirx_dev);
+	v4l2_ctrl_s_ctrl(hdmirx_dev->detect_tx_5v_ctrl, plugin);
+	v4l2_dbg(1, debug, &hdmirx_dev->v4l2_dev, "%s: plugin:%d\n",
+		 __func__, plugin);
+
+	if (plugin)
+		hdmirx_plugin(hdmirx_dev);
+	else
+		hdmirx_plugout(hdmirx_dev);
+
+	mutex_unlock(&hdmirx_dev->work_lock);
+}
+
+static void hdmirx_delayed_work_res_change(struct work_struct *work)
+{
+	struct snps_hdmirx_dev *hdmirx_dev;
+	bool plugin;
+
+	hdmirx_dev = container_of(work, struct snps_hdmirx_dev,
+				  delayed_work_res_change.work);
+
+	mutex_lock(&hdmirx_dev->work_lock);
+	plugin = tx_5v_power_present(hdmirx_dev);
+	v4l2_dbg(1, debug, &hdmirx_dev->v4l2_dev, "%s: plugin:%d\n",
+		 __func__, plugin);
+	if (plugin) {
+		hdmirx_interrupts_setup(hdmirx_dev, false);
+		hdmirx_submodule_init(hdmirx_dev);
+		hdmirx_update_bits(hdmirx_dev, SCDC_CONFIG, POWERPROVIDED,
+				   POWERPROVIDED);
+		hdmirx_hpd_ctrl(hdmirx_dev, true);
+		hdmirx_phy_config(hdmirx_dev);
+
+		if (hdmirx_wait_lock_and_get_timing(hdmirx_dev)) {
+			hdmirx_plugout(hdmirx_dev);
+			queue_delayed_work(system_unbound_wq,
+					   &hdmirx_dev->delayed_work_hotplug,
+					   msecs_to_jiffies(200));
+		} else {
+			hdmirx_dma_config(hdmirx_dev);
+			hdmirx_interrupts_setup(hdmirx_dev, true);
+		}
+	}
+	mutex_unlock(&hdmirx_dev->work_lock);
+}
+
+static void hdmirx_delayed_work_heartbeat(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct snps_hdmirx_dev *hdmirx_dev = container_of(dwork,
+							struct snps_hdmirx_dev,
+							delayed_work_heartbeat);
+
+	queue_work(system_highpri_wq, &hdmirx_dev->work_wdt_config);
+	queue_delayed_work(system_unbound_wq,
+			   &hdmirx_dev->delayed_work_heartbeat, HZ);
+}
+
+static void hdmirx_work_wdt_config(struct work_struct *work)
+{
+	struct arm_smccc_res res;
+	struct snps_hdmirx_dev *hdmirx_dev = container_of(work,
+							struct snps_hdmirx_dev,
+							work_wdt_config);
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	arm_smccc_smc(SIP_WDT_CFG, WDT_PING, 0, 0, 0, 0, 0, 0, &res);
+	v4l2_dbg(3, debug, v4l2_dev, "hb\n");
+}
+
+static irqreturn_t hdmirx_5v_det_irq_handler(int irq, void *dev_id)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_id;
+	u32 val;
+
+	val = gpiod_get_value(hdmirx_dev->detect_5v_gpio);
+	v4l2_dbg(3, debug, &hdmirx_dev->v4l2_dev, "%s: 5v:%d\n", __func__, val);
+
+	queue_delayed_work(system_unbound_wq,
+			   &hdmirx_dev->delayed_work_hotplug,
+			   msecs_to_jiffies(10));
+
+	return IRQ_HANDLED;
+}
+
+static const struct hdmirx_cec_ops hdmirx_cec_ops = {
+	.write = hdmirx_writel,
+	.read = hdmirx_readl,
+};
+
+static int hdmirx_parse_dt(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	struct device *dev = hdmirx_dev->dev;
+	int ret;
+
+	hdmirx_dev->num_clks = devm_clk_bulk_get_all(dev, &hdmirx_dev->clks);
+	if (hdmirx_dev->num_clks < 1)
+		return -ENODEV;
+
+	hdmirx_dev->resets[HDMIRX_RST_A].id = "rst_a";
+	hdmirx_dev->resets[HDMIRX_RST_P].id = "rst_p";
+	hdmirx_dev->resets[HDMIRX_RST_REF].id = "rst_ref";
+	hdmirx_dev->resets[HDMIRX_RST_BIU].id = "rst_biu";
+
+	ret = devm_reset_control_bulk_get_exclusive(dev, HDMIRX_NUM_RST,
+						    hdmirx_dev->resets);
+	if (ret < 0) {
+		dev_err(dev, "failed to get reset controls\n");
+		return ret;
+	}
+
+	hdmirx_dev->detect_5v_gpio =
+		devm_gpiod_get_optional(dev, "hdmirx-5v-detection", GPIOD_IN);
+
+	if (IS_ERR(hdmirx_dev->detect_5v_gpio)) {
+		dev_err(dev, "failed to get hdmirx 5v detection gpio\n");
+		return PTR_ERR(hdmirx_dev->detect_5v_gpio);
+	}
+
+	hdmirx_dev->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
+							  "rockchip,grf");
+	if (IS_ERR(hdmirx_dev->grf)) {
+		dev_err(dev, "failed to get rockchip,grf\n");
+		return PTR_ERR(hdmirx_dev->grf);
+	}
+
+	hdmirx_dev->vo1_grf = syscon_regmap_lookup_by_phandle(dev->of_node,
+							      "rockchip,vo1_grf");
+	if (IS_ERR(hdmirx_dev->vo1_grf)) {
+		dev_err(dev, "failed to get rockchip,vo1_grf\n");
+		return PTR_ERR(hdmirx_dev->vo1_grf);
+	}
+
+	hdmirx_dev->hpd_trigger_level = !device_property_read_bool(dev, "hpd-is-active-low");
+
+	ret = of_reserved_mem_device_init(dev);
+	if (ret)
+		dev_warn(dev, "No reserved memory for HDMIRX, use default CMA\n");
+
+	return 0;
+}
+
+static void hdmirx_disable_all_interrupts(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	hdmirx_writel(hdmirx_dev, MAINUNIT_0_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, MAINUNIT_1_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, MAINUNIT_2_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, AVPUNIT_0_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, AVPUNIT_1_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, PKT_0_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, PKT_1_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, PKT_2_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, SCDC_INT_MASK_N, 0);
+	hdmirx_writel(hdmirx_dev, CEC_INT_MASK_N, 0);
+
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_1_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, MAINUNIT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, AVPUNIT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, AVPUNIT_1_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, PKT_0_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, PKT_1_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, PKT_2_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, SCDC_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, HDCP_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, HDCP_1_INT_CLEAR, 0xffffffff);
+	hdmirx_clear_interrupt(hdmirx_dev, CEC_INT_CLEAR, 0xffffffff);
+}
+
+static int hdmirx_init(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	hdmirx_update_bits(hdmirx_dev, PHY_CONFIG, PHY_RESET | PHY_PDDQ, 0);
+
+	regmap_write(hdmirx_dev->vo1_grf, VO1_GRF_VO1_CON2,
+		     (HDMIRX_SDAIN_MSK | HDMIRX_SCLIN_MSK) |
+		     ((HDMIRX_SDAIN_MSK | HDMIRX_SCLIN_MSK) << 16));
+	/*
+	 * Some interrupts are enabled by default, so we disable
+	 * all interrupts and clear interrupts status first.
+	 */
+	hdmirx_disable_all_interrupts(hdmirx_dev);
+
+	return 0;
+}
+
+static void hdmirx_edid_init_config(struct snps_hdmirx_dev *hdmirx_dev)
+{
+	int ret;
+	struct v4l2_edid def_edid;
+
+	/* disable hpd and write edid */
+	def_edid.pad = 0;
+	def_edid.start_block = 0;
+	def_edid.blocks = EDID_NUM_BLOCKS_MAX;
+	if (hdmirx_dev->edid_version == HDMIRX_EDID_600M)
+		def_edid.edid = edid_init_data_600M;
+	else
+		def_edid.edid = edid_init_data_340M;
+	ret = hdmirx_write_edid(hdmirx_dev, &def_edid, false);
+	if (ret)
+		dev_err(hdmirx_dev->dev, "%s: write edid failed\n", __func__);
+}
+
+static void hdmirx_disable_irq(struct device *dev)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_get_drvdata(dev);
+	struct arm_smccc_res res;
+
+	disable_irq(hdmirx_dev->hdmi_irq);
+	disable_irq(hdmirx_dev->dma_irq);
+	disable_irq(hdmirx_dev->det_irq);
+
+	arm_smccc_smc(RK_SIP_FIQ_CTRL, RK_SIP_FIQ_CTRL_FIQ_DIS,
+		      RK_IRQ_HDMIRX_HDMI, 0, 0, 0, 0, 0, &res);
+
+	cancel_delayed_work_sync(&hdmirx_dev->delayed_work_hotplug);
+	cancel_delayed_work_sync(&hdmirx_dev->delayed_work_res_change);
+	cancel_delayed_work_sync(&hdmirx_dev->delayed_work_heartbeat);
+	flush_work(&hdmirx_dev->work_wdt_config);
+
+	arm_smccc_smc(SIP_WDT_CFG, WDT_STOP, 0, 0, 0, 0, 0, 0, &res);
+}
+
+static int hdmirx_disable(struct device *dev)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_get_drvdata(dev);
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+
+	clk_bulk_disable_unprepare(hdmirx_dev->num_clks, hdmirx_dev->clks);
+
+	v4l2_dbg(2, debug, v4l2_dev, "%s: suspend\n", __func__);
+
+	return pinctrl_pm_select_sleep_state(dev);
+}
+
+static void hdmirx_enable_irq(struct device *dev)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_get_drvdata(dev);
+	struct arm_smccc_res res;
+
+	enable_irq(hdmirx_dev->hdmi_irq);
+	enable_irq(hdmirx_dev->dma_irq);
+	enable_irq(hdmirx_dev->det_irq);
+
+	arm_smccc_smc(RK_SIP_FIQ_CTRL, RK_SIP_FIQ_CTRL_FIQ_EN,
+		      RK_IRQ_HDMIRX_HDMI, 0, 0, 0, 0, 0, &res);
+
+	queue_delayed_work(system_unbound_wq, &hdmirx_dev->delayed_work_hotplug,
+			   msecs_to_jiffies(20));
+}
+
+static int hdmirx_enable(struct device *dev)
+{
+	struct snps_hdmirx_dev *hdmirx_dev = dev_get_drvdata(dev);
+	struct v4l2_device *v4l2_dev = &hdmirx_dev->v4l2_dev;
+	int ret;
+
+	v4l2_dbg(2, debug, v4l2_dev, "%s: resume\n", __func__);
+	ret = pinctrl_pm_select_default_state(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = clk_bulk_prepare_enable(hdmirx_dev->num_clks, hdmirx_dev->clks);
+	if (ret) {
+		dev_err(dev, "failed to enable hdmirx bulk clks: %d\n", ret);
+		return ret;
+	}
+
+	reset_control_bulk_assert(HDMIRX_NUM_RST, hdmirx_dev->resets);
+	usleep_range(150, 160);
+	reset_control_bulk_deassert(HDMIRX_NUM_RST, hdmirx_dev->resets);
+	usleep_range(150, 160);
+
+	hdmirx_edid_init_config(hdmirx_dev);
+
+	return 0;
+}
+
+static int hdmirx_suspend(struct device *dev)
+{
+	hdmirx_disable_irq(dev);
+
+	return hdmirx_disable(dev);
+}
+
+static int hdmirx_resume(struct device *dev)
+{
+	int ret = hdmirx_enable(dev);
+
+	if (ret)
+		return ret;
+
+	hdmirx_enable_irq(dev);
+
+	return 0;
+}
+
+static const struct dev_pm_ops snps_hdmirx_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(hdmirx_suspend, hdmirx_resume)
+};
+
+static int hdmirx_setup_irq(struct snps_hdmirx_dev *hdmirx_dev,
+			    struct platform_device *pdev)
+{
+	struct device *dev = hdmirx_dev->dev;
+	int ret, irq;
+
+	irq = platform_get_irq_byname(pdev, "hdmi");
+	if (irq < 0) {
+		dev_err_probe(dev, irq, "failed to get hdmi irq\n");
+		return irq;
+	}
+
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+	hdmirx_dev->hdmi_irq = irq;
+	ret = devm_request_irq(dev, irq, hdmirx_hdmi_irq_handler, 0,
+			       "rk_hdmirx-hdmi", hdmirx_dev);
+	if (ret) {
+		dev_err_probe(dev, ret, "failed to request hdmi irq\n");
+		return ret;
+	}
+
+	irq = platform_get_irq_byname(pdev, "dma");
+	if (irq < 0) {
+		dev_err_probe(dev, irq, "failed to get dma irq\n");
+		return irq;
+	}
+
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+	hdmirx_dev->dma_irq = irq;
+	ret = devm_request_threaded_irq(dev, irq, NULL, hdmirx_dma_irq_handler,
+					IRQF_ONESHOT, "rk_hdmirx-dma",
+					hdmirx_dev);
+	if (ret) {
+		dev_err_probe(dev, ret, "failed to request dma irq\n");
+		return ret;
+	}
+
+	irq = gpiod_to_irq(hdmirx_dev->detect_5v_gpio);
+	if (irq < 0) {
+		dev_err_probe(dev, irq, "failed to get hdmirx-5v irq\n");
+		return irq;
+	}
+
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+
+	hdmirx_dev->det_irq = irq;
+	ret = devm_request_irq(dev, irq, hdmirx_5v_det_irq_handler,
+			       IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+			       "rk_hdmirx-5v", hdmirx_dev);
+	if (ret) {
+		dev_err_probe(dev, ret, "failed to request hdmirx-5v irq\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hdmirx_register_cec(struct snps_hdmirx_dev *hdmirx_dev,
+			       struct platform_device *pdev)
+{
+	struct device *dev = hdmirx_dev->dev;
+	struct hdmirx_cec_data cec_data;
+	int irq;
+
+	irq = platform_get_irq_byname(pdev, "cec");
+	if (irq < 0) {
+		dev_err_probe(dev, irq, "failed to get cec irq\n");
+		return irq;
+	}
+
+	hdmirx_dev->cec_notifier = cec_notifier_conn_register(dev, NULL, NULL);
+	if (!hdmirx_dev->cec_notifier)
+		return -EINVAL;
+
+	cec_data.hdmirx = hdmirx_dev;
+	cec_data.dev = hdmirx_dev->dev;
+	cec_data.ops = &hdmirx_cec_ops;
+	cec_data.irq = irq;
+	cec_data.edid = edid_init_data_340M;
+
+	hdmirx_dev->cec = snps_hdmirx_cec_register(&cec_data);
+	if (!hdmirx_dev->cec) {
+		cec_notifier_conn_unregister(hdmirx_dev->cec_notifier);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hdmirx_probe(struct platform_device *pdev)
+{
+	struct snps_hdmirx_dev *hdmirx_dev;
+	struct device *dev = &pdev->dev;
+	struct v4l2_ctrl_handler *hdl;
+	struct hdmirx_stream *stream;
+	struct v4l2_device *v4l2_dev;
+	int ret;
+
+	hdmirx_dev = devm_kzalloc(dev, sizeof(*hdmirx_dev), GFP_KERNEL);
+	if (!hdmirx_dev)
+		return -ENOMEM;
+
+	ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	hdmirx_dev->dev = dev;
+	dev_set_drvdata(dev, hdmirx_dev);
+	hdmirx_dev->edid_version = HDMIRX_EDID_340M;
+
+	ret = hdmirx_parse_dt(hdmirx_dev);
+	if (ret)
+		return ret;
+
+	ret = hdmirx_setup_irq(hdmirx_dev, pdev);
+	if (ret)
+		return ret;
+
+	hdmirx_dev->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(hdmirx_dev->regs))
+		return dev_err_probe(dev, PTR_ERR(hdmirx_dev->regs),
+				     "failed to remap regs resource\n");
+
+	mutex_init(&hdmirx_dev->stream_lock);
+	mutex_init(&hdmirx_dev->work_lock);
+	spin_lock_init(&hdmirx_dev->rst_lock);
+
+	init_completion(&hdmirx_dev->cr_write_done);
+	init_completion(&hdmirx_dev->timer_base_lock);
+	init_completion(&hdmirx_dev->avi_pkt_rcv);
+
+	INIT_WORK(&hdmirx_dev->work_wdt_config, hdmirx_work_wdt_config);
+	INIT_DELAYED_WORK(&hdmirx_dev->delayed_work_hotplug,
+			  hdmirx_delayed_work_hotplug);
+	INIT_DELAYED_WORK(&hdmirx_dev->delayed_work_res_change,
+			  hdmirx_delayed_work_res_change);
+	INIT_DELAYED_WORK(&hdmirx_dev->delayed_work_heartbeat,
+			  hdmirx_delayed_work_heartbeat);
+
+	hdmirx_dev->cur_fmt_fourcc = V4L2_PIX_FMT_BGR24;
+	hdmirx_dev->timings = cea640x480;
+
+	hdmirx_enable(dev);
+	hdmirx_init(hdmirx_dev);
+
+	v4l2_dev = &hdmirx_dev->v4l2_dev;
+	strscpy(v4l2_dev->name, dev_name(dev), sizeof(v4l2_dev->name));
+
+	hdl = &hdmirx_dev->hdl;
+	v4l2_ctrl_handler_init(hdl, 1);
+	hdmirx_dev->detect_tx_5v_ctrl = v4l2_ctrl_new_std(hdl, NULL,
+							  V4L2_CID_DV_RX_POWER_PRESENT,
+							  0, 1, 0, 0);
+	if (hdl->error) {
+		dev_err(dev, "v4l2 ctrl handler init failed\n");
+		ret = hdl->error;
+		goto err_pm;
+	}
+	hdmirx_dev->v4l2_dev.ctrl_handler = hdl;
+
+	ret = v4l2_device_register(dev, &hdmirx_dev->v4l2_dev);
+	if (ret < 0) {
+		dev_err(dev, "register v4l2 device failed\n");
+		goto err_hdl;
+	}
+
+	stream = &hdmirx_dev->stream;
+	stream->hdmirx_dev = hdmirx_dev;
+	ret = hdmirx_register_stream_vdev(stream);
+	if (ret < 0) {
+		dev_err(dev, "register video device failed\n");
+		goto err_unreg_v4l2_dev;
+	}
+
+	ret = hdmirx_register_cec(hdmirx_dev, pdev);
+	if (ret)
+		goto err_unreg_video_dev;
+
+	hdmirx_enable_irq(dev);
+
+	return 0;
+
+err_unreg_video_dev:
+	video_unregister_device(&hdmirx_dev->stream.vdev);
+err_unreg_v4l2_dev:
+	v4l2_device_unregister(&hdmirx_dev->v4l2_dev);
+err_hdl:
+	v4l2_ctrl_handler_free(&hdmirx_dev->hdl);
+err_pm:
+	hdmirx_disable(dev);
+
+	return ret;
+}
+
+static int hdmirx_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct snps_hdmirx_dev *hdmirx_dev = dev_get_drvdata(dev);
+
+	snps_hdmirx_cec_unregister(hdmirx_dev->cec);
+	cec_notifier_conn_unregister(hdmirx_dev->cec_notifier);
+
+	hdmirx_disable_irq(dev);
+
+	video_unregister_device(&hdmirx_dev->stream.vdev);
+	v4l2_ctrl_handler_free(&hdmirx_dev->hdl);
+	v4l2_device_unregister(&hdmirx_dev->v4l2_dev);
+
+	hdmirx_disable(dev);
+
+	reset_control_bulk_assert(HDMIRX_NUM_RST, hdmirx_dev->resets);
+
+	of_reserved_mem_device_release(dev);
+
+	return 0;
+}
+
+static const struct of_device_id hdmirx_id[] = {
+	{ .compatible = "rockchip,rk3588-hdmirx-ctrler" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, hdmirx_id);
+
+static struct platform_driver hdmirx_driver = {
+	.probe = hdmirx_probe,
+	.remove = hdmirx_remove,
+	.driver = {
+		.name = "snps_hdmirx",
+		.of_match_table = hdmirx_id,
+		.pm = &snps_hdmirx_pm_ops,
+	}
+};
+module_platform_driver(hdmirx_driver);
+
+MODULE_DESCRIPTION("Rockchip HDMI Receiver Driver");
+MODULE_AUTHOR("Dingxian Wen <shawn.wen@rock-chips.com>");
+MODULE_AUTHOR("Shreeya Patel <shreeya.patel@collabora.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.h b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.h
new file mode 100644
index 000000000000..220ab99ca611
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx.h
@@ -0,0 +1,394 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd.
+ *
+ * Author: Dingxian Wen <shawn.wen@rock-chips.com>
+ */
+
+#ifndef DW_HDMIRX_H
+#define DW_HDMIRX_H
+
+#include <linux/bitops.h>
+
+#define UPDATE(x, h, l)		(((x) << (l)) & GENMASK((h), (l)))
+#define HIWORD_UPDATE(v, h, l)	(((v) << (l)) | (GENMASK((h), (l)) << 16))
+
+/* SYS_GRF */
+#define SYS_GRF_SOC_CON1			0x0304
+#define HDMIRXPHY_SRAM_EXT_LD_DONE		BIT(1)
+#define HDMIRXPHY_SRAM_BYPASS			BIT(0)
+#define SYS_GRF_SOC_STATUS1			0x0384
+#define HDMIRXPHY_SRAM_INIT_DONE		BIT(10)
+#define SYS_GRF_CHIP_ID				0x0600
+
+/* VO1_GRF */
+#define VO1_GRF_VO1_CON2			0x0008
+#define HDMIRX_SDAIN_MSK			BIT(2)
+#define HDMIRX_SCLIN_MSK			BIT(1)
+
+/* HDMIRX PHY */
+#define SUP_DIG_ANA_CREGS_SUP_ANA_NC			0x004f
+
+#define	LANE0_DIG_ASIC_RX_OVRD_OUT_0			0x100f
+#define	LANE1_DIG_ASIC_RX_OVRD_OUT_0			0x110f
+#define	LANE2_DIG_ASIC_RX_OVRD_OUT_0			0x120f
+#define	LANE3_DIG_ASIC_RX_OVRD_OUT_0			0x130f
+#define ASIC_ACK_OVRD_EN				BIT(1)
+#define ASIC_ACK					BIT(0)
+
+#define	LANE0_DIG_RX_VCOCAL_RX_VCO_CAL_CTRL_2		0x104a
+#define	LANE1_DIG_RX_VCOCAL_RX_VCO_CAL_CTRL_2		0x114a
+#define	LANE2_DIG_RX_VCOCAL_RX_VCO_CAL_CTRL_2		0x124a
+#define	LANE3_DIG_RX_VCOCAL_RX_VCO_CAL_CTRL_2		0x134a
+#define FREQ_TUNE_START_VAL_MASK			GENMASK(9, 0)
+#define FREQ_TUNE_START_VAL(x)				UPDATE(x, 9, 0)
+
+#define	HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_FSM_CONFIG	0x20c4
+#define	HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_ADAPT_REF_FOM	0x20c7
+#define HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_3_REG	0x20e9
+#define CDR_SETTING_BOUNDARY_3_DEFAULT			0x52da
+#define HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_4_REG	0x20ea
+#define CDR_SETTING_BOUNDARY_4_DEFAULT			0x43cd
+#define HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_5_REG	0x20eb
+#define CDR_SETTING_BOUNDARY_5_DEFAULT			0x35b3
+#define HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_6_REG	0x20fb
+#define	CDR_SETTING_BOUNDARY_6_DEFAULT			0x2799
+#define HDMIPCS_DIG_CTRL_PATH_MAIN_FSM_RATE_CALC_HDMI14_CDR_SETTING_7_REG	0x20fc
+#define CDR_SETTING_BOUNDARY_7_DEFAULT			0x1b65
+
+#define	RAWLANE0_DIG_PCS_XF_RX_OVRD_OUT			0x300e
+#define	RAWLANE1_DIG_PCS_XF_RX_OVRD_OUT			0x310e
+#define	RAWLANE2_DIG_PCS_XF_RX_OVRD_OUT			0x320e
+#define	RAWLANE3_DIG_PCS_XF_RX_OVRD_OUT			0x330e
+#define PCS_ACK_WRITE_SELECT				BIT(14)
+#define PCS_EN_CTL					BIT(1)
+#define PCS_ACK						BIT(0)
+
+#define	RAWLANE0_DIG_AON_FAST_FLAGS			0x305c
+#define	RAWLANE1_DIG_AON_FAST_FLAGS			0x315c
+#define	RAWLANE2_DIG_AON_FAST_FLAGS			0x325c
+#define	RAWLANE3_DIG_AON_FAST_FLAGS			0x335c
+
+/* HDMIRX Ctrler */
+#define GLOBAL_SWRESET_REQUEST			0x0020
+#define DATAPATH_SWRESETREQ			BIT(12)
+#define GLOBAL_SWENABLE				0x0024
+#define PHYCTRL_ENABLE				BIT(21)
+#define CEC_ENABLE				BIT(16)
+#define TMDS_ENABLE				BIT(13)
+#define DATAPATH_ENABLE				BIT(12)
+#define PKTFIFO_ENABLE				BIT(11)
+#define AVPUNIT_ENABLE				BIT(8)
+#define MAIN_ENABLE				BIT(0)
+#define GLOBAL_TIMER_REF_BASE			0x0028
+#define CORE_CONFIG				0x0050
+#define CMU_CONFIG0				0x0060
+#define TMDSQPCLK_STABLE_FREQ_MARGIN_MASK	GENMASK(30, 16)
+#define TMDSQPCLK_STABLE_FREQ_MARGIN(x)		UPDATE(x, 30, 16)
+#define AUDCLK_STABLE_FREQ_MARGIN_MASK		GENMASK(11, 9)
+#define AUDCLK_STABLE_FREQ_MARGIN(x)		UPDATE(x, 11, 9)
+#define CMU_STATUS				0x007c
+#define TMDSQPCLK_LOCKED_ST			BIT(4)
+#define CMU_TMDSQPCLK_FREQ			0x0084
+#define PHY_CONFIG				0x00c0
+#define LDO_AFE_PROG_MASK			GENMASK(24, 23)
+#define LDO_AFE_PROG(x)				UPDATE(x, 24, 23)
+#define LDO_PWRDN				BIT(21)
+#define TMDS_CLOCK_RATIO			BIT(16)
+#define RXDATA_WIDTH				BIT(15)
+#define REFFREQ_SEL_MASK			GENMASK(11, 9)
+#define REFFREQ_SEL(x)				UPDATE(x, 11, 9)
+#define HDMI_DISABLE				BIT(8)
+#define PHY_PDDQ				BIT(1)
+#define PHY_RESET				BIT(0)
+#define PHY_STATUS				0x00c8
+#define HDMI_DISABLE_ACK			BIT(1)
+#define PDDQ_ACK				BIT(0)
+#define PHYCREG_CONFIG0				0x00e0
+#define PHYCREG_CR_PARA_SELECTION_MODE_MASK	GENMASK(1, 0)
+#define PHYCREG_CR_PARA_SELECTION_MODE(x)	UPDATE(x, 1, 0)
+#define PHYCREG_CONFIG1				0x00e4
+#define PHYCREG_CONFIG2				0x00e8
+#define PHYCREG_CONFIG3				0x00ec
+#define PHYCREG_CONTROL				0x00f0
+#define PHYCREG_CR_PARA_WRITE_P			BIT(1)
+#define PHYCREG_CR_PARA_READ_P			BIT(0)
+#define PHYCREG_STATUS				0x00f4
+
+#define MAINUNIT_STATUS				0x0150
+#define TMDSVALID_STABLE_ST			BIT(1)
+#define DESCRAND_EN_CONTROL			0x0210
+#define SCRAMB_EN_SEL_QST_MASK			GENMASK(1, 0)
+#define SCRAMB_EN_SEL_QST(x)			UPDATE(x, 1, 0)
+#define DESCRAND_SYNC_CONTROL			0x0214
+#define RECOVER_UNSYNC_STREAM_QST		BIT(0)
+#define DESCRAND_SYNC_SEQ_CONFIG		0x022c
+#define DESCRAND_SYNC_SEQ_ERR_CNT_EN		BIT(0)
+#define DESCRAND_SYNC_SEQ_STATUS		0x0234
+#define DEFRAMER_CONFIG0			0x0270
+#define VS_CNT_THR_QST_MASK			GENMASK(27, 20)
+#define VS_CNT_THR_QST(x)			UPDATE(x, 27, 20)
+#define HS_POL_QST_MASK				GENMASK(19, 18)
+#define HS_POL_QST(x)				UPDATE(x, 19, 18)
+#define VS_POL_QST_MASK				GENMASK(17, 16)
+#define VS_POL_QST(x)				UPDATE(x, 17, 16)
+#define VS_REMAPFILTER_EN_QST			BIT(8)
+#define VS_FILTER_ORDER_QST_MASK		GENMASK(1, 0)
+#define VS_FILTER_ORDER_QST(x)			UPDATE(x, 1, 0)
+#define DEFRAMER_VSYNC_CNT_CLEAR		0x0278
+#define VSYNC_CNT_CLR_P				BIT(0)
+#define DEFRAMER_STATUS				0x027c
+#define OPMODE_STS_MASK				GENMASK(6, 4)
+#define I2C_SLAVE_CONFIG1			0x0164
+#define I2C_SDA_OUT_HOLD_VALUE_QST_MASK		GENMASK(15, 8)
+#define I2C_SDA_OUT_HOLD_VALUE_QST(x)		UPDATE(x, 15, 8)
+#define I2C_SDA_IN_HOLD_VALUE_QST_MASK		GENMASK(7, 0)
+#define I2C_SDA_IN_HOLD_VALUE_QST(x)		UPDATE(x, 7, 0)
+#define OPMODE_STS_MASK				GENMASK(6, 4)
+#define REPEATER_QST				BIT(28)
+#define FASTREAUTH_QST				BIT(27)
+#define FEATURES_1DOT1_QST			BIT(26)
+#define FASTI2C_QST				BIT(25)
+#define EESS_CTL_THR_QST_MASK			GENMASK(19, 16)
+#define EESS_CTL_THR_QST(x)			UPDATE(x, 19, 16)
+#define OESS_CTL3_THR_QST_MASK			GENMASK(11, 8)
+#define OESS_CTL3_THR_QST(x)			UPDATE(x, 11, 8)
+#define EESS_OESS_SEL_QST_MASK			GENMASK(5, 4)
+#define EESS_OESS_SEL_QST(x)			UPDATE(x, 5, 4)
+#define KEY_DECRYPT_EN_QST			BIT(0)
+#define KEY_DECRYPT_SEED_QST_MASK		GENMASK(15, 0)
+#define KEY_DECRYPT_SEED_QST(x)			UPDATE(x, 15, 0)
+#define HDCP_INT_CLEAR				0x50d8
+#define HDCP_1_INT_CLEAR			0x50e8
+#define HDCP2_CONFIG				0x02f0
+#define HDCP2_SWITCH_OVR_VALUE			BIT(2)
+#define HDCP2_SWITCH_OVR_EN			BIT(1)
+
+#define VIDEO_CONFIG2				0x042c
+#define VPROC_VSYNC_POL_OVR_VALUE		BIT(19)
+#define VPROC_VSYNC_POL_OVR_EN			BIT(18)
+#define VPROC_HSYNC_POL_OVR_VALUE		BIT(17)
+#define VPROC_HSYNC_POL_OVR_EN			BIT(16)
+#define VPROC_FMT_OVR_VALUE_MASK		GENMASK(6, 4)
+#define VPROC_FMT_OVR_VALUE(x)			UPDATE(x, 6, 4)
+#define VPROC_FMT_OVR_EN			BIT(0)
+
+#define AFIFO_FILL_RESTART			BIT(0)
+#define AFIFO_INIT_P				BIT(0)
+#define AFIFO_THR_LOW_QST_MASK			GENMASK(25, 16)
+#define AFIFO_THR_LOW_QST(x)			UPDATE(x, 25, 16)
+#define AFIFO_THR_HIGH_QST_MASK			GENMASK(9, 0)
+#define AFIFO_THR_HIGH_QST(x)			UPDATE(x, 9, 0)
+#define AFIFO_THR_MUTE_LOW_QST_MASK		GENMASK(25, 16)
+#define AFIFO_THR_MUTE_LOW_QST(x)		UPDATE(x, 25, 16)
+#define AFIFO_THR_MUTE_HIGH_QST_MASK		GENMASK(9, 0)
+#define AFIFO_THR_MUTE_HIGH_QST(x)		UPDATE(x, 9, 0)
+
+#define AFIFO_UNDERFLOW_ST			BIT(25)
+#define AFIFO_OVERFLOW_ST			BIT(24)
+
+#define SPEAKER_ALLOC_OVR_EN			BIT(16)
+#define I2S_BPCUV_EN				BIT(4)
+#define SPDIF_EN				BIT(2)
+#define I2S_EN					BIT(1)
+#define AFIFO_THR_PASS_DEMUTEMASK_N		BIT(24)
+#define AVMUTE_DEMUTEMASK_N			BIT(16)
+#define AFIFO_THR_MUTE_LOW_MUTEMASK_N		BIT(9)
+#define AFIFO_THR_MUTE_HIGH_MUTEMASK_N		BIT(8)
+#define AVMUTE_MUTEMASK_N			BIT(0)
+#define SCDC_CONFIG				0x0580
+#define HPDLOW					BIT(1)
+#define POWERPROVIDED				BIT(0)
+#define SCDC_REGBANK_STATUS1			0x058c
+#define SCDC_TMDSBITCLKRATIO			BIT(1)
+#define SCDC_REGBANK_STATUS3			0x0594
+#define SCDC_REGBANK_CONFIG0			0x05c0
+#define SCDC_SINKVERSION_QST_MASK		GENMASK(7, 0)
+#define SCDC_SINKVERSION_QST(x)			UPDATE(x, 7, 0)
+#define AGEN_LAYOUT				BIT(4)
+#define AGEN_SPEAKER_ALLOC			GENMASK(15, 8)
+
+#define CED_CONFIG				0x0760
+#define CED_VIDDATACHECKEN_QST			BIT(27)
+#define CED_DATAISCHECKEN_QST			BIT(26)
+#define CED_GBCHECKEN_QST			BIT(25)
+#define CED_CTRLCHECKEN_QST			BIT(24)
+#define CED_CHLOCKMAXER_QST_MASK		GENMASK(14, 0)
+#define CED_CHLOCKMAXER_QST(x)			UPDATE(x, 14, 0)
+#define CED_DYN_CONFIG				0x0768
+#define CED_DYN_CONTROL				0x076c
+#define PKTEX_BCH_ERRFILT_CONFIG		0x07c4
+#define PKTEX_CHKSUM_ERRFILT_CONFIG		0x07c8
+
+#define PKTDEC_ACR_PH2_1			0x1100
+#define PKTDEC_ACR_PB3_0			0x1104
+#define PKTDEC_ACR_PB7_4			0x1108
+#define PKTDEC_AVIIF_PH2_1			0x1200
+#define PKTDEC_AVIIF_PB3_0			0x1204
+#define PKTDEC_AVIIF_PB7_4			0x1208
+#define VIC_VAL_MASK				GENMASK(6, 0)
+#define PKTDEC_AVIIF_PB11_8			0x120c
+#define PKTDEC_AVIIF_PB15_12			0x1210
+#define PKTDEC_AVIIF_PB19_16			0x1214
+#define PKTDEC_AVIIF_PB23_20			0x1218
+#define PKTDEC_AVIIF_PB27_24			0x121c
+
+#define PKTFIFO_CONFIG				0x1500
+#define PKTFIFO_STORE_FILT_CONFIG		0x1504
+#define PKTFIFO_THR_CONFIG0			0x1508
+#define PKTFIFO_THR_CONFIG1			0x150c
+#define PKTFIFO_CONTROL				0x1510
+
+#define VMON_STATUS1				0x1580
+#define VMON_STATUS2				0x1584
+#define VMON_STATUS3				0x1588
+#define VMON_STATUS4				0x158c
+#define VMON_STATUS5				0x1590
+#define VMON_STATUS6				0x1594
+#define VMON_STATUS7				0x1598
+#define VMON_ILACE_DETECT			BIT(4)
+
+#define CEC_TX_CONTROL				0x2000
+#define CEC_STATUS				0x2004
+#define CEC_CONFIG				0x2008
+#define RX_AUTO_DRIVE_ACKNOWLEDGE		BIT(9)
+#define CEC_ADDR				0x200c
+#define CEC_TX_COUNT				0x2020
+#define CEC_TX_DATA3_0				0x2024
+#define CEC_RX_COUNT_STATUS			0x2040
+#define CEC_RX_DATA3_0				0x2044
+#define CEC_LOCK_CONTROL			0x2054
+#define CEC_RXQUAL_BITTIME_CONFIG		0x2060
+#define CEC_RX_BITTIME_CONFIG			0x2064
+#define CEC_TX_BITTIME_CONFIG			0x2068
+
+#define DMA_CONFIG1				0x4400
+#define UV_WID_MASK				GENMASK(31, 28)
+#define UV_WID(x)				UPDATE(x, 31, 28)
+#define Y_WID_MASK				GENMASK(27, 24)
+#define Y_WID(x)				UPDATE(x, 27, 24)
+#define DDR_STORE_FORMAT_MASK			GENMASK(15, 12)
+#define DDR_STORE_FORMAT(x)			UPDATE(x, 15, 12)
+#define ABANDON_EN				BIT(0)
+#define DMA_CONFIG2				0x4404
+#define DMA_CONFIG3				0x4408
+#define DMA_CONFIG4				0x440c // dma irq en
+#define DMA_CONFIG5				0x4410 // dma irq clear status
+#define LINE_FLAG_INT_EN			BIT(8)
+#define HDMIRX_DMA_IDLE_INT			BIT(7)
+#define HDMIRX_LOCK_DISABLE_INT			BIT(6)
+#define LAST_FRAME_AXI_UNFINISH_INT_EN		BIT(5)
+#define FIFO_OVERFLOW_INT_EN			BIT(2)
+#define FIFO_UNDERFLOW_INT_EN			BIT(1)
+#define HDMIRX_AXI_ERROR_INT_EN			BIT(0)
+#define DMA_CONFIG6				0x4414
+#define RB_SWAP_EN				BIT(9)
+#define HSYNC_TOGGLE_EN				BIT(5)
+#define VSYNC_TOGGLE_EN				BIT(4)
+#define HDMIRX_DMA_EN				BIT(1)
+#define DMA_CONFIG7				0x4418
+#define LINE_FLAG_NUM_MASK			GENMASK(31, 16)
+#define LINE_FLAG_NUM(x)			UPDATE(x, 31, 16)
+#define LOCK_FRAME_NUM_MASK			GENMASK(11, 0)
+#define LOCK_FRAME_NUM(x)			UPDATE(x, 11, 0)
+#define DMA_CONFIG8				0x441c
+#define REG_MIRROR_EN				BIT(0)
+#define DMA_CONFIG9				0x4420
+#define DMA_CONFIG10				0x4424
+#define DMA_CONFIG11				0x4428
+#define EDID_READ_EN_MASK			BIT(8)
+#define EDID_READ_EN(x)				UPDATE(x, 8, 8)
+#define EDID_WRITE_EN_MASK			BIT(7)
+#define EDID_WRITE_EN(x)			UPDATE(x, 7, 7)
+#define EDID_SLAVE_ADDR_MASK			GENMASK(6, 0)
+#define EDID_SLAVE_ADDR(x)			UPDATE(x, 6, 0)
+#define DMA_STATUS1				0x4430 // dma irq status
+#define DMA_STATUS2				0x4434
+#define DMA_STATUS3				0x4438
+#define DMA_STATUS4				0x443c
+#define DMA_STATUS5				0x4440
+#define DMA_STATUS6				0x4444
+#define DMA_STATUS7				0x4448
+#define DMA_STATUS8				0x444c
+#define DMA_STATUS9				0x4450
+#define DMA_STATUS10				0x4454
+#define HDMIRX_LOCK				BIT(3)
+#define DMA_STATUS11				0x4458
+#define HDMIRX_TYPE_MASK			GENMASK(8, 7)
+#define HDMIRX_COLOR_DEPTH_MASK			GENMASK(6, 3)
+#define HDMIRX_FORMAT_MASK			GENMASK(2, 0)
+#define DMA_STATUS12				0x445c
+#define DMA_STATUS13				0x4460
+#define DMA_STATUS14				0x4464
+
+#define MAINUNIT_INTVEC_INDEX			0x5000
+#define MAINUNIT_0_INT_STATUS			0x5010
+#define CECRX_NOTIFY_ERR			BIT(12)
+#define CECRX_EOM				BIT(11)
+#define CECTX_DRIVE_ERR				BIT(10)
+#define CECRX_BUSY				BIT(9)
+#define CECTX_BUSY				BIT(8)
+#define CECTX_FRAME_DISCARDED			BIT(5)
+#define CECTX_NRETRANSMIT_FAIL			BIT(4)
+#define CECTX_LINE_ERR				BIT(3)
+#define CECTX_ARBLOST				BIT(2)
+#define CECTX_NACK				BIT(1)
+#define CECTX_DONE				BIT(0)
+#define MAINUNIT_0_INT_MASK_N			0x5014
+#define MAINUNIT_0_INT_CLEAR			0x5018
+#define MAINUNIT_0_INT_FORCE			0x501c
+#define TIMER_BASE_LOCKED_IRQ			BIT(26)
+#define TMDSQPCLK_OFF_CHG			BIT(5)
+#define TMDSQPCLK_LOCKED_CHG			BIT(4)
+#define MAINUNIT_1_INT_STATUS			0x5020
+#define MAINUNIT_1_INT_MASK_N			0x5024
+#define MAINUNIT_1_INT_CLEAR			0x5028
+#define MAINUNIT_1_INT_FORCE			0x502c
+#define MAINUNIT_2_INT_STATUS			0x5030
+#define MAINUNIT_2_INT_MASK_N			0x5034
+#define MAINUNIT_2_INT_CLEAR			0x5038
+#define MAINUNIT_2_INT_FORCE			0x503c
+#define PHYCREG_CR_READ_DONE			BIT(11)
+#define PHYCREG_CR_WRITE_DONE			BIT(10)
+#define TMDSVALID_STABLE_CHG			BIT(1)
+
+#define AVPUNIT_0_INT_STATUS			0x5040
+#define AVPUNIT_0_INT_MASK_N			0x5044
+#define AVPUNIT_0_INT_CLEAR			0x5048
+#define AVPUNIT_0_INT_FORCE			0x504c
+#define CED_DYN_CNT_CH2_IRQ			BIT(22)
+#define CED_DYN_CNT_CH1_IRQ			BIT(21)
+#define CED_DYN_CNT_CH0_IRQ			BIT(20)
+#define AVPUNIT_1_INT_STATUS			0x5050
+#define DEFRAMER_VSYNC_THR_REACHED_IRQ		BIT(1)
+#define AVPUNIT_1_INT_MASK_N			0x5054
+#define DEFRAMER_VSYNC_THR_REACHED_MASK_N	BIT(1)
+#define DEFRAMER_VSYNC_MASK_N			BIT(0)
+#define AVPUNIT_1_INT_CLEAR			0x5058
+#define DEFRAMER_VSYNC_THR_REACHED_CLEAR	BIT(1)
+#define PKT_0_INT_STATUS			0x5080
+#define PKTDEC_ACR_CHG_IRQ			BIT(3)
+#define PKT_0_INT_MASK_N			0x5084
+#define PKTDEC_ACR_CHG_MASK_N			BIT(3)
+#define PKT_0_INT_CLEAR				0x5088
+#define PKT_1_INT_STATUS			0x5090
+#define PKT_1_INT_MASK_N			0x5094
+#define PKT_1_INT_CLEAR				0x5098
+#define PKT_2_INT_STATUS			0x50a0
+#define PKTDEC_ACR_RCV_IRQ			BIT(3)
+#define PKT_2_INT_MASK_N			0x50a4
+#define PKTDEC_AVIIF_RCV_IRQ			BIT(11)
+#define PKTDEC_ACR_RCV_MASK_N			BIT(3)
+#define PKT_2_INT_CLEAR				0x50a8
+#define PKTDEC_AVIIF_RCV_CLEAR			BIT(11)
+#define PKTDEC_ACR_RCV_CLEAR			BIT(3)
+#define SCDC_INT_STATUS				0x50c0
+#define SCDC_INT_MASK_N				0x50c4
+#define SCDC_INT_CLEAR				0x50c8
+#define SCDCTMDSCCFG_CHG			BIT(2)
+
+#define CEC_INT_STATUS				0x5100
+#define CEC_INT_MASK_N				0x5104
+#define CEC_INT_CLEAR				0x5108
+
+#endif
diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.c b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.c
new file mode 100644
index 000000000000..8554cbc4ccde
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd.
+ *
+ * Author: Shunqing Chen <csq@rock-chips.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <media/cec.h>
+#include <media/cec-notifier.h>
+
+#include "snps_hdmirx.h"
+#include "snps_hdmirx_cec.h"
+
+static void hdmirx_cec_write(struct hdmirx_cec *cec, int reg, u32 val)
+{
+	cec->ops->write(cec->hdmirx, reg, val);
+}
+
+static u32 hdmirx_cec_read(struct hdmirx_cec *cec, int reg)
+{
+	return cec->ops->read(cec->hdmirx, reg);
+}
+
+static void hdmirx_cec_update_bits(struct hdmirx_cec *cec, int reg, u32 mask,
+				   u32 data)
+{
+	u32 val = hdmirx_cec_read(cec, reg) & ~mask;
+
+	val |= (data & mask);
+	hdmirx_cec_write(cec, reg, val);
+}
+
+static int hdmirx_cec_log_addr(struct cec_adapter *adap, u8 logical_addr)
+{
+	struct hdmirx_cec *cec = cec_get_drvdata(adap);
+
+	if (logical_addr == CEC_LOG_ADDR_INVALID)
+		cec->addresses = 0;
+	else
+		cec->addresses |= BIT(logical_addr) | BIT(15);
+
+	hdmirx_cec_write(cec, CEC_ADDR, cec->addresses);
+
+	return 0;
+}
+
+static int hdmirx_cec_transmit(struct cec_adapter *adap, u8 attempts,
+			       u32 signal_free_time, struct cec_msg *msg)
+{
+	struct hdmirx_cec *cec = cec_get_drvdata(adap);
+	u32 data[4] = {0};
+	int i, data_len, msg_len;
+
+	msg_len = msg->len;
+	if (msg->len > 16)
+		msg_len = 16;
+	if (msg_len <= 0)
+		return 0;
+
+	hdmirx_cec_write(cec, CEC_TX_COUNT, msg_len - 1);
+	for (i = 0; i < msg_len; i++)
+		data[i / 4] |= msg->msg[i] << (i % 4) * 8;
+
+	data_len = msg_len / 4 + 1;
+	for (i = 0; i < data_len; i++)
+		hdmirx_cec_write(cec, CEC_TX_DATA3_0 + i * 4, data[i]);
+
+	hdmirx_cec_write(cec, CEC_TX_CONTROL, 0x1);
+
+	return 0;
+}
+
+static irqreturn_t hdmirx_cec_hardirq(int irq, void *data)
+{
+	struct cec_adapter *adap = data;
+	struct hdmirx_cec *cec = cec_get_drvdata(adap);
+	u32 stat = hdmirx_cec_read(cec, CEC_INT_STATUS);
+	irqreturn_t ret = IRQ_HANDLED;
+	u32 val;
+
+	if (!stat)
+		return IRQ_NONE;
+
+	hdmirx_cec_write(cec, CEC_INT_CLEAR, stat);
+
+	if (stat & CECTX_LINE_ERR) {
+		cec->tx_status = CEC_TX_STATUS_ERROR;
+		cec->tx_done = true;
+		ret = IRQ_WAKE_THREAD;
+	} else if (stat & CECTX_DONE) {
+		cec->tx_status = CEC_TX_STATUS_OK;
+		cec->tx_done = true;
+		ret = IRQ_WAKE_THREAD;
+	} else if (stat & CECTX_NACK) {
+		cec->tx_status = CEC_TX_STATUS_NACK;
+		cec->tx_done = true;
+		ret = IRQ_WAKE_THREAD;
+	}
+
+	if (stat & CECRX_EOM) {
+		unsigned int len, i;
+
+		val = hdmirx_cec_read(cec, CEC_RX_COUNT_STATUS);
+		/* rxbuffer locked status */
+		if ((val & 0x80))
+			return ret;
+
+		len = (val & 0xf) + 1;
+		if (len > sizeof(cec->rx_msg.msg))
+			len = sizeof(cec->rx_msg.msg);
+
+		for (i = 0; i < len; i++) {
+			if (!(i % 4))
+				val = hdmirx_cec_read(cec, CEC_RX_DATA3_0 + i / 4 * 4);
+			cec->rx_msg.msg[i] = (val >> ((i % 4) * 8)) & 0xff;
+		}
+
+		cec->rx_msg.len = len;
+		smp_wmb(); /* receive RX msg */
+		cec->rx_done = true;
+		hdmirx_cec_write(cec, CEC_LOCK_CONTROL, 0x1);
+
+		ret = IRQ_WAKE_THREAD;
+	}
+
+	return ret;
+}
+
+static irqreturn_t hdmirx_cec_thread(int irq, void *data)
+{
+	struct cec_adapter *adap = data;
+	struct hdmirx_cec *cec = cec_get_drvdata(adap);
+
+	if (cec->tx_done) {
+		cec->tx_done = false;
+		cec_transmit_attempt_done(adap, cec->tx_status);
+	}
+	if (cec->rx_done) {
+		cec->rx_done = false;
+		smp_rmb(); /* RX msg has been received */
+		cec_received_msg(adap, &cec->rx_msg);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int hdmirx_cec_enable(struct cec_adapter *adap, bool enable)
+{
+	struct hdmirx_cec *cec = cec_get_drvdata(adap);
+
+	if (!enable) {
+		hdmirx_cec_write(cec, CEC_INT_MASK_N, 0);
+		hdmirx_cec_write(cec, CEC_INT_CLEAR, 0);
+		if (cec->ops->disable)
+			cec->ops->disable(cec->hdmirx);
+	} else {
+		unsigned int irqs;
+
+		hdmirx_cec_log_addr(cec->adap, CEC_LOG_ADDR_INVALID);
+		if (cec->ops->enable)
+			cec->ops->enable(cec->hdmirx);
+		hdmirx_cec_update_bits(cec, GLOBAL_SWENABLE, CEC_ENABLE, CEC_ENABLE);
+
+		irqs = CECTX_LINE_ERR | CECTX_NACK | CECRX_EOM | CECTX_DONE;
+		hdmirx_cec_write(cec, CEC_INT_MASK_N, irqs);
+	}
+
+	return 0;
+}
+
+static const struct cec_adap_ops hdmirx_cec_ops = {
+	.adap_enable = hdmirx_cec_enable,
+	.adap_log_addr = hdmirx_cec_log_addr,
+	.adap_transmit = hdmirx_cec_transmit,
+};
+
+static void hdmirx_cec_del(void *data)
+{
+	struct hdmirx_cec *cec = data;
+
+	cec_delete_adapter(cec->adap);
+}
+
+struct hdmirx_cec *snps_hdmirx_cec_register(struct hdmirx_cec_data *data)
+{
+	struct hdmirx_cec *cec;
+	unsigned int irqs;
+	int ret;
+
+	if (!data)
+		return NULL;
+
+	/*
+	 * Our device is just a convenience - we want to link to the real
+	 * hardware device here, so that userspace can see the association
+	 * between the HDMI hardware and its associated CEC chardev.
+	 */
+	cec = devm_kzalloc(data->dev, sizeof(*cec), GFP_KERNEL);
+	if (!cec)
+		return NULL;
+
+	cec->dev = data->dev;
+	cec->irq = data->irq;
+	cec->ops = data->ops;
+	cec->hdmirx = data->hdmirx;
+	cec->edid = (struct edid *)data->edid;
+
+	hdmirx_cec_update_bits(cec, GLOBAL_SWENABLE, CEC_ENABLE, CEC_ENABLE);
+	hdmirx_cec_update_bits(cec, CEC_CONFIG, RX_AUTO_DRIVE_ACKNOWLEDGE,
+			       RX_AUTO_DRIVE_ACKNOWLEDGE);
+
+	hdmirx_cec_write(cec, CEC_TX_COUNT, 0);
+	hdmirx_cec_write(cec, CEC_INT_MASK_N, 0);
+	hdmirx_cec_write(cec, CEC_INT_CLEAR, ~0);
+
+	cec->adap = cec_allocate_adapter(&hdmirx_cec_ops, cec, "rk-hdmirx",
+					 CEC_CAP_LOG_ADDRS | CEC_CAP_TRANSMIT |
+					 CEC_CAP_RC | CEC_CAP_PASSTHROUGH,
+					 CEC_MAX_LOG_ADDRS);
+	if (IS_ERR(cec->adap)) {
+		dev_err(cec->dev, "cec adap allocate failed\n");
+		return NULL;
+	}
+
+	/* override the module pointer */
+	cec->adap->owner = THIS_MODULE;
+
+	ret = devm_add_action(cec->dev, hdmirx_cec_del, cec);
+	if (ret) {
+		cec_delete_adapter(cec->adap);
+		return NULL;
+	}
+
+	irq_set_status_flags(cec->irq, IRQ_NOAUTOEN);
+
+	ret = devm_request_threaded_irq(cec->dev, cec->irq,
+					hdmirx_cec_hardirq,
+					hdmirx_cec_thread, IRQF_ONESHOT,
+					"rk_hdmirx_cec", cec->adap);
+	if (ret) {
+		dev_err(cec->dev, "cec irq request failed\n");
+		return NULL;
+	}
+
+	cec->notify = cec_notifier_cec_adap_register(cec->dev,
+						     NULL, cec->adap);
+	if (!cec->notify) {
+		dev_err(cec->dev, "cec notify register failed\n");
+		return NULL;
+	}
+
+	ret = cec_register_adapter(cec->adap, cec->dev);
+	if (ret < 0) {
+		dev_err(cec->dev, "cec register adapter failed\n");
+		cec_unregister_adapter(cec->adap);
+		return NULL;
+	}
+
+	cec_s_phys_addr_from_edid(cec->adap, cec->edid);
+
+	irqs = CECTX_LINE_ERR | CECTX_NACK | CECRX_EOM | CECTX_DONE;
+	hdmirx_cec_write(cec, CEC_INT_MASK_N, irqs);
+
+	/*
+	 * CEC documentation says we must not call cec_delete_adapter
+	 * after a successful call to cec_register_adapter().
+	 */
+	devm_remove_action(cec->dev, hdmirx_cec_del, cec);
+
+	enable_irq(cec->irq);
+
+	return cec;
+}
+
+void snps_hdmirx_cec_unregister(struct hdmirx_cec *cec)
+{
+	if (!cec)
+		return;
+
+	disable_irq(cec->irq);
+
+	cec_unregister_adapter(cec->adap);
+}
diff --git a/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.h b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.h
new file mode 100644
index 000000000000..ae43f74d471d
--- /dev/null
+++ b/drivers/media/platform/synopsys/hdmirx/snps_hdmirx_cec.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021 Rockchip Electronics Co. Ltd.
+ *
+ * Author: Shunqing Chen <csq@rock-chips.com>
+ */
+
+#ifndef DW_HDMI_RX_CEC_H
+#define DW_HDMI_RX_CEC_H
+
+struct snps_hdmirx_dev;
+
+struct hdmirx_cec_ops {
+	void (*write)(struct snps_hdmirx_dev *hdmirx_dev, int reg, u32 val);
+	u32 (*read)(struct snps_hdmirx_dev *hdmirx_dev, int reg);
+	void (*enable)(struct snps_hdmirx_dev *hdmirx);
+	void (*disable)(struct snps_hdmirx_dev *hdmirx);
+};
+
+struct hdmirx_cec_data {
+	struct snps_hdmirx_dev *hdmirx;
+	const struct hdmirx_cec_ops *ops;
+	struct device *dev;
+	int irq;
+	u8 *edid;
+};
+
+struct hdmirx_cec {
+	struct snps_hdmirx_dev *hdmirx;
+	struct device *dev;
+	const struct hdmirx_cec_ops *ops;
+	u32 addresses;
+	struct cec_adapter *adap;
+	struct cec_msg rx_msg;
+	unsigned int tx_status;
+	bool tx_done;
+	bool rx_done;
+	struct cec_notifier *notify;
+	int irq;
+	struct edid *edid;
+};
+
+struct hdmirx_cec *snps_hdmirx_cec_register(struct hdmirx_cec_data *data);
+void snps_hdmirx_cec_unregister(struct hdmirx_cec *cec);
+
+#endif /* DW_HDMI_RX_CEC_H */
--
2.42.0


From 7e4c878d8e2fb2c45e9183968768ca875ead4644 Mon Sep 17 00:00:00 2001
From: Michal Tomek <mtdev79b@gmail.com>
Date: Wed, 20 Mar 2024 21:58:54 +0100
Subject: [PATCH 67/69] phy: rockchip-snps-pcie3: fix bifurcation on rk3588

So far all RK3588 boards use fully aggregated PCIe. CM3588 is one
of the few boards using this feature and apparently it is broken.

The PHY offers the following mapping options:

  port 0 lane 0 - always mapped to controller 0 (4L)
  port 0 lane 1 - to controller 0 or 2 (1L0)
  port 1 lane 0 - to controller 0 or 1 (2L)
  port 1 lane 1 - to controller 0, 1 or 3 (1L1)

The data-lanes DT property maps these as follows:

  0 = no controller (unsupported by the HW)
  1 = 4L
  2 = 2L
  3 = 1L0
  4 = 1L1

That allows the following configurations with first column being the mainline
data-lane mapping, second column being the downstream name, third column
being PCIE3PHY_GRF_CMN_CON0 and PHP_GRF_PCIESEL register values and final
column being the user visible lane setup:

  <1 1 1 1> = AGGREG = [4 0] = x4 (aggregation; used by most boards)
  <1 1 2 2> = NANBNB = [0 0] = x2 x2 (no bif.)
  <1 3 2 2> = NANBBI = [1 1] = x2 x1x1 (bif. of port 0)
  <1 1 2 4> = NABINB = [2 2] = x1x1 x2 (bif. of port 1)
  <1 3 2 4> = NABIBI = [3 3] = x1x1 x1x1 (bif. of both ports; used by CM3588)

The driver currently does not program PHP_GRF_PCIESEL correctly, which is
fixed by this patch. As a side-effect the new logic is much simpler than
the old logic.

Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3")
Signed-off-by: Michal Tomek <mtdev79b@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../phy/rockchip/phy-rockchip-snps-pcie3.c    | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
index 121e5961ce11..d5bcc9c42b28 100644
--- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
+++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
@@ -132,7 +132,7 @@ static const struct rockchip_p3phy_ops rk3568_ops = {
 static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)
 {
 	u32 reg = 0;
-	u8 mode = 0;
+	u8 mode = RK3588_LANE_AGGREGATION; /* default */
 	int ret;

 	/* Deassert PCIe PMA output clamp mode */
@@ -140,28 +140,20 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)

 	/* Set bifurcation if needed */
 	for (int i = 0; i < priv->num_lanes; i++) {
-		if (!priv->lanes[i])
-			mode |= (BIT(i) << 3);
-
 		if (priv->lanes[i] > 1)
-			mode |= (BIT(i) >> 1);
-	}
-
-	if (!mode)
-		reg = RK3588_LANE_AGGREGATION;
-	else {
-		if (mode & (BIT(0) | BIT(1)))
-			reg |= RK3588_BIFURCATION_LANE_0_1;
-
-		if (mode & (BIT(2) | BIT(3)))
-			reg |= RK3588_BIFURCATION_LANE_2_3;
+			mode &= ~RK3588_LANE_AGGREGATION;
+		if (priv->lanes[i] == 3)
+			mode |= RK3588_BIFURCATION_LANE_0_1;
+		if (priv->lanes[i] == 4)
+			mode |= RK3588_BIFURCATION_LANE_2_3;
 	}

+	reg = mode;
 	regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg);

 	/* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */
 	if (!IS_ERR(priv->pipe_grf)) {
-		reg = (mode & (BIT(6) | BIT(7))) >> 6;
+		reg = mode & 3;
 		if (reg)
 			regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON,
 				     (reg << 16) | reg);
--
2.42.0


From 120b5c71bd8d613ba9e58c297b3132fe906d7fdc Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 21 Mar 2024 15:09:28 +0100
Subject: [PATCH 68/69] phy: rockchip: naneng-combphy: Fix mux on rk3588

The pcie1l0_sel and pcie1l1_sel bits in PCIESEL_CON configure the
mux for PCIe1L0 and PCIe1L1 to either the PIPE Combo PHYs or the
PCIe3 PHY. Thus this configuration interfers with the data-lanes
configuration done by the PCIe3 PHY.

RK3588 has three Combo PHYs. The first one has a dedicated PCIe
controller and is not affected by this. For the other two Combo
PHYs, there is one mux for each of them.

pcie1l0_sel selects if PCIe 1L0 is muxed to Combo PHY 1 when
bit is set to 0 or to the PCIe3 PHY when bit is set to 1.

pcie1l1_sel selects if PCIe 1L1 is muxed to Combo PHY 2 when
bit is set to 0 or to the PCIe3 PHY when bit is set to 1.

Currently the code always muxes 1L0 and 1L1 to the Combi PHYs
once one of them is being used in PCIe mode. This is obviously
wrong when at least one of the ports should be muxed to the
PCIe3 PHY.

Fix this by introducing Combo PHY identification and then only
setting up the required bit.

Fixes: a03c44277253 ("phy: rockchip: Add naneng combo phy support for RK3588")
Reported-by: Michal Tomek <mtdev79b@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../rockchip/phy-rockchip-naneng-combphy.c    | 36 +++++++++++++++++--
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
index 76b9cf417591..bf74e429ff46 100644
--- a/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
+++ b/drivers/phy/rockchip/phy-rockchip-naneng-combphy.c
@@ -125,12 +125,15 @@ struct rockchip_combphy_grfcfg {
 };

 struct rockchip_combphy_cfg {
+	unsigned int num_phys;
+	unsigned int phy_ids[3];
 	const struct rockchip_combphy_grfcfg *grfcfg;
 	int (*combphy_cfg)(struct rockchip_combphy_priv *priv);
 };

 struct rockchip_combphy_priv {
 	u8 type;
+	int id;
 	void __iomem *mmio;
 	int num_clks;
 	struct clk_bulk_data *clks;
@@ -320,7 +323,7 @@ static int rockchip_combphy_probe(struct platform_device *pdev)
 	struct rockchip_combphy_priv *priv;
 	const struct rockchip_combphy_cfg *phy_cfg;
 	struct resource *res;
-	int ret;
+	int ret, id;

 	phy_cfg = of_device_get_match_data(dev);
 	if (!phy_cfg) {
@@ -338,6 +341,15 @@ static int rockchip_combphy_probe(struct platform_device *pdev)
 		return ret;
 	}

+	/* find the phy-id from the io address */
+	priv->id = -ENODEV;
+	for (id = 0; id < phy_cfg->num_phys; id++) {
+		if (res->start == phy_cfg->phy_ids[id]) {
+			priv->id = id;
+			break;
+		}
+	}
+
 	priv->dev = dev;
 	priv->type = PHY_NONE;
 	priv->cfg = phy_cfg;
@@ -562,6 +574,12 @@ static const struct rockchip_combphy_grfcfg rk3568_combphy_grfcfgs = {
 };

 static const struct rockchip_combphy_cfg rk3568_combphy_cfgs = {
+	.num_phys = 3,
+	.phy_ids = {
+		0xfe820000,
+		0xfe830000,
+		0xfe840000,
+	},
 	.grfcfg		= &rk3568_combphy_grfcfgs,
 	.combphy_cfg	= rk3568_combphy_cfg,
 };
@@ -578,8 +596,14 @@ static int rk3588_combphy_cfg(struct rockchip_combphy_priv *priv)
 		rockchip_combphy_param_write(priv->phy_grf, &cfg->con1_for_pcie, true);
 		rockchip_combphy_param_write(priv->phy_grf, &cfg->con2_for_pcie, true);
 		rockchip_combphy_param_write(priv->phy_grf, &cfg->con3_for_pcie, true);
-		rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
-		rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+		switch (priv->id) {
+		case 1:
+			rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l0_sel, true);
+			break;
+		case 2:
+			rockchip_combphy_param_write(priv->pipe_grf, &cfg->pipe_pcie1l1_sel, true);
+			break;
+		}
 		break;
 	case PHY_TYPE_USB3:
 		/* Set SSC downward spread spectrum */
@@ -736,6 +760,12 @@ static const struct rockchip_combphy_grfcfg rk3588_combphy_grfcfgs = {
 };

 static const struct rockchip_combphy_cfg rk3588_combphy_cfgs = {
+	.num_phys = 3,
+	.phy_ids = {
+		0xfee00000,
+		0xfee10000,
+		0xfee20000,
+	},
 	.grfcfg		= &rk3588_combphy_grfcfgs,
 	.combphy_cfg	= rk3588_combphy_cfg,
 };
--
2.42.0


From 99fc9cef1176e5290f88c1f55589174022a638cd Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Tue, 26 Mar 2024 18:16:50 +0100
Subject: [PATCH 69/69] phy: rockchip-snps-pcie3: fix clearing
 PHP_GRF_PCIESEL_CON bits

Currently the PCIe v3 PHY driver only sets the pcie1ln_sel bits, but
does not clear them because of an incorrect write mask. This fixes up
the issue by using a newly introduced constant for the write mask.

While at it it also introduces a proper GENMASK based constant for
the PCIE30_PHY_MODE.

Fixes: 2e9bffc4f713 ("phy: rockchip: Support PCIe v3")
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/phy/rockchip/phy-rockchip-snps-pcie3.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
index d5bcc9c42b28..9857ee45b89e 100644
--- a/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
+++ b/drivers/phy/rockchip/phy-rockchip-snps-pcie3.c
@@ -40,6 +40,8 @@
 #define RK3588_BIFURCATION_LANE_0_1		BIT(0)
 #define RK3588_BIFURCATION_LANE_2_3		BIT(1)
 #define RK3588_LANE_AGGREGATION		BIT(2)
+#define RK3588_PCIE1LN_SEL_EN			(GENMASK(1, 0) << 16)
+#define RK3588_PCIE30_PHY_MODE_EN		(GENMASK(2, 0) << 16)

 struct rockchip_p3phy_ops;

@@ -149,14 +151,15 @@ static int rockchip_p3phy_rk3588_init(struct rockchip_p3phy_priv *priv)
 	}

 	reg = mode;
-	regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0, (0x7<<16) | reg);
+	regmap_write(priv->phy_grf, RK3588_PCIE3PHY_GRF_CMN_CON0,
+		     RK3588_PCIE30_PHY_MODE_EN | reg);

 	/* Set pcie1ln_sel in PHP_GRF_PCIESEL_CON */
 	if (!IS_ERR(priv->pipe_grf)) {
-		reg = mode & 3;
+		reg = mode & (RK3588_BIFURCATION_LANE_0_1 | RK3588_BIFURCATION_LANE_2_3);
 		if (reg)
 			regmap_write(priv->pipe_grf, PHP_GRF_PCIESEL_CON,
-				     (reg << 16) | reg);
+				     RK3588_PCIE1LN_SEL_EN | reg);
 	}

 	reset_control_deassert(priv->p30phy);
--
2.42.0