diff --git a/shared/platform/orangepi5plus.nix b/shared/platform/orangepi5plus.nix index 15a3621..0008a95 100644 --- a/shared/platform/orangepi5plus.nix +++ b/shared/platform/orangepi5plus.nix @@ -9,12 +9,12 @@ in { boot = { kernelPackages = crossPkgs.linuxPackagesFor (crossPkgs.buildLinux rec { - version = "6.8.0-rc1"; - modDirVersion = "6.8.0-rc1"; + version = "6.8.0-rc7"; + modDirVersion = "6.8.0-rc7"; src = builtins.fetchTarball { - url = "https://git.kernel.org/torvalds/t/linux-6.8-rc1.tar.gz"; - sha256 = "0rnrd1iy73vkrablx6rqlmxv9bv9zjfh6zj09aqca9rr5h8iz1p3"; + url = "https://git.kernel.org/torvalds/t/linux-6.8-rc7.tar.gz"; + sha256 = "sha256:0q9isgv6lxzrmb4idl0spxv2l7fsk3nn4cdq0vdw9c8lyzrh5yy0"; }; kernelPatches = [ { diff --git a/shared/platform/orangepi5plus/rk3588-v6.8.0-rc1.patch b/shared/platform/orangepi5plus/rk3588-v6.8.0-rc7.patch similarity index 90% rename from shared/platform/orangepi5plus/rk3588-v6.8.0-rc1.patch rename to shared/platform/orangepi5plus/rk3588-v6.8.0-rc7.patch index c09caea..e530453 100644 --- a/shared/platform/orangepi5plus/rk3588-v6.8.0-rc1.patch +++ b/shared/platform/orangepi5plus/rk3588-v6.8.0-rc7.patch @@ -1,7 +1,7 @@ -From 81d7accbd06c1d8fcf0ebb6f7f9a58fb87c8b86f Mon Sep 17 00:00:00 2001 +From de838f54392f9a8148ec6cc64697f9e6eea98bbd Mon Sep 17 00:00:00 2001 From: Christopher Obbard Date: Mon, 20 Feb 2023 16:59:04 +0000 -Subject: [PATCH 01/81] [NOUPSTREAM] Add GitLab CI support +Subject: [PATCH 01/71] [NOUPSTREAM] Add GitLab CI support Build a Kernel .deb package in GitLab CI and run a basic LAVA boot test using Debian bookworm. @@ -224,10 +224,10 @@ index 000000000000..8dfaf772296c 2.42.0 -From f679916fdf19edb563993e2367af239d1c5c3d2b Mon Sep 17 00:00:00 2001 +From 0c3a9245d09cf855e7641f9d0d10b888a2b79325 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 25 Jan 2024 19:20:49 +0100 -Subject: [PATCH 02/81] [NOUPSTREAM] Add Mali FW to CI pipeline +Subject: [PATCH 02/71] [NOUPSTREAM] Add Mali FW to CI pipeline Provide the Mali firmware, so that Panthor can probe successfully. @@ -262,10 +262,16789 @@ index 37e570c6e3b2..7f89af4d1ab5 100644 2.42.0 -From 926d91cd90f505483cc9ad4a51e5ae50f7cffbf7 Mon Sep 17 00:00:00 2001 +From aacba653590bee1286309f5b3fa4befb536cbbd5 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:15 +0100 +Subject: [PATCH 03/71] [MERGED] drm/panthor: Add uAPI + +Panthor follows the lead of other recently submitted drivers with +ioctls allowing us to support modern Vulkan features, like sparse memory +binding: + +- Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET), + with the 'exclusive-VM' bit to speed-up BO reservation on job submission +- VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND + ioctl is loosely based on the Xe model, and can handle both + asynchronous and synchronous requests +- GPU execution context creation/destruction, tiler heap context creation + and job submission. Those ioctls reflect how the hardware/scheduler + works and are thus driver specific. + +We also have a way to expose IO regions, such that the usermode driver +can directly access specific/well-isolate registers, like the +LATEST_FLUSH register used to implement cache-flush reduction. + +This uAPI intentionally keeps usermode queues out of the scope, which +explains why doorbell registers and command stream ring-buffers are not +directly exposed to userspace. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix typo +- Add Liviu's R-b + +v4: +- Add a VM_GET_STATE ioctl +- Fix doc +- Expose the CORE_FEATURES register so we can deal with variants in the + UMD +- Add Steve's R-b + +v3: +- Add the concept of sync-only VM operation +- Fix support for 32-bit userspace +- Rework drm_panthor_vm_create to pass the user VA size instead of + the kernel VA size (suggested by Robin Murphy) +- Typo fixes +- Explicitly cast enums with top bit set to avoid compiler warnings in + -pedantic mode. +- Drop property core_group_count as it can be easily calculated by the + number of bits set in l2_present. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-2-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + Documentation/gpu/driver-uapi.rst | 5 + + include/uapi/drm/panthor_drm.h | 945 ++++++++++++++++++++++++++++++ + 2 files changed, 950 insertions(+) + create mode 100644 include/uapi/drm/panthor_drm.h + +diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst +index e5070a0e95ab..971cdb4816fc 100644 +--- a/Documentation/gpu/driver-uapi.rst ++++ b/Documentation/gpu/driver-uapi.rst +@@ -18,6 +18,11 @@ VM_BIND / EXEC uAPI + + .. kernel-doc:: include/uapi/drm/nouveau_drm.h + ++drm/panthor uAPI ++================ ++ ++.. kernel-doc:: include/uapi/drm/panthor_drm.h ++ + drm/xe uAPI + =========== + +diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h +new file mode 100644 +index 000000000000..373df80f41ed +--- /dev/null ++++ b/include/uapi/drm/panthor_drm.h +@@ -0,0 +1,945 @@ ++/* SPDX-License-Identifier: MIT */ ++/* Copyright (C) 2023 Collabora ltd. */ ++#ifndef _PANTHOR_DRM_H_ ++#define _PANTHOR_DRM_H_ ++ ++#include "drm.h" ++ ++#if defined(__cplusplus) ++extern "C" { ++#endif ++ ++/** ++ * DOC: Introduction ++ * ++ * This documentation describes the Panthor IOCTLs. ++ * ++ * Just a few generic rules about the data passed to the Panthor IOCTLs: ++ * ++ * - Structures must be aligned on 64-bit/8-byte. If the object is not ++ * naturally aligned, a padding field must be added. ++ * - Fields must be explicitly aligned to their natural type alignment with ++ * pad[0..N] fields. ++ * - All padding fields will be checked by the driver to make sure they are ++ * zeroed. ++ * - Flags can be added, but not removed/replaced. ++ * - New fields can be added to the main structures (the structures ++ * directly passed to the ioctl). Those fields can be added at the end of ++ * the structure, or replace existing padding fields. Any new field being ++ * added must preserve the behavior that existed before those fields were ++ * added when a value of zero is passed. ++ * - New fields can be added to indirect objects (objects pointed by the ++ * main structure), iff those objects are passed a size to reflect the ++ * size known by the userspace driver (see drm_panthor_obj_array::stride ++ * or drm_panthor_dev_query::size). ++ * - If the kernel driver is too old to know some fields, those will be ++ * ignored if zero, and otherwise rejected (and so will be zero on output). ++ * - If userspace is too old to know some fields, those will be zeroed ++ * (input) before the structure is parsed by the kernel driver. ++ * - Each new flag/field addition must come with a driver version update so ++ * the userspace driver doesn't have to trial and error to know which ++ * flags are supported. ++ * - Structures should not contain unions, as this would defeat the ++ * extensibility of such structures. ++ * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed ++ * at the end of the drm_panthor_ioctl_id enum. ++ */ ++ ++/** ++ * DOC: MMIO regions exposed to userspace. ++ * ++ * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET ++ * ++ * File offset for all MMIO regions being exposed to userspace. Don't use ++ * this value directly, use DRM_PANTHOR_USER__OFFSET values instead. ++ * pgoffset passed to mmap2() is an unsigned long, which forces us to use a ++ * different offset on 32-bit and 64-bit systems. ++ * ++ * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET ++ * ++ * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls ++ * GPU cache flushing through CS instructions, but the flush reduction ++ * mechanism requires a flush_id. This flush_id could be queried with an ++ * ioctl, but Arm provides a well-isolated register page containing only this ++ * read-only register, so let's expose this page through a static mmap offset ++ * and allow direct mapping of this MMIO region so we can avoid the ++ * user <-> kernel round-trip. ++ */ ++#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) ++#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) ++#define DRM_PANTHOR_USER_MMIO_OFFSET (sizeof(unsigned long) < 8 ? \ ++ DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ ++ DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) ++#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) ++ ++/** ++ * DOC: IOCTL IDs ++ * ++ * enum drm_panthor_ioctl_id - IOCTL IDs ++ * ++ * Place new ioctls at the end, don't re-order, don't replace or remove entries. ++ * ++ * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx ++ * definitions instead. ++ */ ++enum drm_panthor_ioctl_id { ++ /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ ++ DRM_PANTHOR_DEV_QUERY = 0, ++ ++ /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ ++ DRM_PANTHOR_VM_CREATE, ++ ++ /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ ++ DRM_PANTHOR_VM_DESTROY, ++ ++ /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ ++ DRM_PANTHOR_VM_BIND, ++ ++ /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ ++ DRM_PANTHOR_VM_GET_STATE, ++ ++ /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ ++ DRM_PANTHOR_BO_CREATE, ++ ++ /** ++ * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to ++ * mmap to map a GEM object. ++ */ ++ DRM_PANTHOR_BO_MMAP_OFFSET, ++ ++ /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ ++ DRM_PANTHOR_GROUP_CREATE, ++ ++ /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ ++ DRM_PANTHOR_GROUP_DESTROY, ++ ++ /** ++ * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging ++ * to a specific scheduling group. ++ */ ++ DRM_PANTHOR_GROUP_SUBMIT, ++ ++ /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */ ++ DRM_PANTHOR_GROUP_GET_STATE, ++ ++ /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ ++ DRM_PANTHOR_TILER_HEAP_CREATE, ++ ++ /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ ++ DRM_PANTHOR_TILER_HEAP_DESTROY, ++}; ++ ++/** ++ * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number ++ * @__access: Access type. Must be R, W or RW. ++ * @__id: One of the DRM_PANTHOR_xxx id. ++ * @__type: Suffix of the type being passed to the IOCTL. ++ * ++ * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx ++ * values instead. ++ * ++ * Return: An IOCTL number to be passed to ioctl() from userspace. ++ */ ++#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ ++ DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ ++ struct drm_panthor_ ## __type) ++ ++#define DRM_IOCTL_PANTHOR_DEV_QUERY \ ++ DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query) ++#define DRM_IOCTL_PANTHOR_VM_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create) ++#define DRM_IOCTL_PANTHOR_VM_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy) ++#define DRM_IOCTL_PANTHOR_VM_BIND \ ++ DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind) ++#define DRM_IOCTL_PANTHOR_VM_GET_STATE \ ++ DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state) ++#define DRM_IOCTL_PANTHOR_BO_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create) ++#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \ ++ DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset) ++#define DRM_IOCTL_PANTHOR_GROUP_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create) ++#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy) ++#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit) ++#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \ ++ DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state) ++#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \ ++ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create) ++#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \ ++ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy) ++ ++/** ++ * DOC: IOCTL arguments ++ */ ++ ++/** ++ * struct drm_panthor_obj_array - Object array. ++ * ++ * This object is used to pass an array of objects whose size is subject to changes in ++ * future versions of the driver. In order to support this mutability, we pass a stride ++ * describing the size of the object as known by userspace. ++ * ++ * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use ++ * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to ++ * the object size. ++ */ ++struct drm_panthor_obj_array { ++ /** @stride: Stride of object struct. Used for versioning. */ ++ __u32 stride; ++ ++ /** @count: Number of objects in the array. */ ++ __u32 count; ++ ++ /** @array: User pointer to an array of objects. */ ++ __u64 array; ++}; ++ ++/** ++ * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. ++ * @cnt: Number of elements in the array. ++ * @ptr: Pointer to the array to pass to the kernel. ++ * ++ * Macro initializing a drm_panthor_obj_array based on the object size as known ++ * by userspace. ++ */ ++#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ ++ { .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) } ++ ++/** ++ * enum drm_panthor_sync_op_flags - Synchronization operation flags. ++ */ ++enum drm_panthor_sync_op_flags { ++ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, ++ ++ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, ++ ++ /** ++ * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization ++ * object type. ++ */ ++ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, ++ ++ /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ ++ DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, ++ ++ /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ ++ DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), ++}; ++ ++/** ++ * struct drm_panthor_sync_op - Synchronization operation. ++ */ ++struct drm_panthor_sync_op { ++ /** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */ ++ __u32 flags; ++ ++ /** @handle: Sync handle. */ ++ __u32 handle; ++ ++ /** ++ * @timeline_value: MBZ if ++ * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != ++ * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. ++ */ ++ __u64 timeline_value; ++}; ++ ++/** ++ * enum drm_panthor_dev_query_type - Query type ++ * ++ * Place new types at the end, don't re-order, don't remove or replace. ++ */ ++enum drm_panthor_dev_query_type { ++ /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ ++ DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, ++ ++ /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ ++ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, ++}; ++ ++/** ++ * struct drm_panthor_gpu_info - GPU information ++ * ++ * Structure grouping all queryable information relating to the GPU. ++ */ ++struct drm_panthor_gpu_info { ++ /** @gpu_id : GPU ID. */ ++ __u32 gpu_id; ++#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) ++#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) ++#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) ++#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) ++#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) ++#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) ++#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) ++ ++ /** @gpu_rev: GPU revision. */ ++ __u32 gpu_rev; ++ ++ /** @csf_id: Command stream frontend ID. */ ++ __u32 csf_id; ++#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) ++#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) ++#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) ++#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) ++#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) ++#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) ++ ++ /** @l2_features: L2-cache features. */ ++ __u32 l2_features; ++ ++ /** @tiler_features: Tiler features. */ ++ __u32 tiler_features; ++ ++ /** @mem_features: Memory features. */ ++ __u32 mem_features; ++ ++ /** @mmu_features: MMU features. */ ++ __u32 mmu_features; ++#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) ++ ++ /** @thread_features: Thread features. */ ++ __u32 thread_features; ++ ++ /** @max_threads: Maximum number of threads. */ ++ __u32 max_threads; ++ ++ /** @thread_max_workgroup_size: Maximum workgroup size. */ ++ __u32 thread_max_workgroup_size; ++ ++ /** ++ * @thread_max_barrier_size: Maximum number of threads that can wait ++ * simultaneously on a barrier. ++ */ ++ __u32 thread_max_barrier_size; ++ ++ /** @coherency_features: Coherency features. */ ++ __u32 coherency_features; ++ ++ /** @texture_features: Texture features. */ ++ __u32 texture_features[4]; ++ ++ /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ ++ __u32 as_present; ++ ++ /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ ++ __u64 shader_present; ++ ++ /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ ++ __u64 l2_present; ++ ++ /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ ++ __u64 tiler_present; ++ ++ /* @core_features: Used to discriminate core variants when they exist. */ ++ __u32 core_features; ++ ++ /* @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_csif_info - Command stream interface information ++ * ++ * Structure grouping all queryable information relating to the command stream interface. ++ */ ++struct drm_panthor_csif_info { ++ /** @csg_slot_count: Number of command stream group slots exposed by the firmware. */ ++ __u32 csg_slot_count; ++ ++ /** @cs_slot_count: Number of command stream slots per group. */ ++ __u32 cs_slot_count; ++ ++ /** @cs_reg_count: Number of command stream registers. */ ++ __u32 cs_reg_count; ++ ++ /** @scoreboard_slot_count: Number of scoreboard slots. */ ++ __u32 scoreboard_slot_count; ++ ++ /** ++ * @unpreserved_cs_reg_count: Number of command stream registers reserved by ++ * the kernel driver to call a userspace command stream. ++ * ++ * All registers can be used by a userspace command stream, but the ++ * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are ++ * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. ++ */ ++ __u32 unpreserved_cs_reg_count; ++ ++ /** ++ * @pad: Padding field, set to zero. ++ */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY ++ */ ++struct drm_panthor_dev_query { ++ /** @type: the query type (see drm_panthor_dev_query_type). */ ++ __u32 type; ++ ++ /** ++ * @size: size of the type being queried. ++ * ++ * If pointer is NULL, size is updated by the driver to provide the ++ * output structure size. If pointer is not NULL, the driver will ++ * only copy min(size, actual_structure_size) bytes to the pointer, ++ * and update the size accordingly. This allows us to extend query ++ * types without breaking userspace. ++ */ ++ __u32 size; ++ ++ /** ++ * @pointer: user pointer to a query type struct. ++ * ++ * Pointer can be NULL, in which case, nothing is copied, but the ++ * actual structure size is returned. If not NULL, it must point to ++ * a location that's large enough to hold size bytes. ++ */ ++ __u64 pointer; ++}; ++ ++/** ++ * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE ++ */ ++struct drm_panthor_vm_create { ++ /** @flags: VM flags, MBZ. */ ++ __u32 flags; ++ ++ /** @id: Returned VM ID. */ ++ __u32 id; ++ ++ /** ++ * @user_va_range: Size of the VA space reserved for user objects. ++ * ++ * The kernel will pick the remaining space to map kernel-only objects to the ++ * VM (heap chunks, heap context, ring buffers, kernel synchronization objects, ++ * ...). If the space left for kernel objects is too small, kernel object ++ * allocation will fail further down the road. One can use ++ * drm_panthor_gpu_info::mmu_features to extract the total virtual address ++ * range, and chose a user_va_range that leaves some space to the kernel. ++ * ++ * If user_va_range is zero, the kernel will pick a sensible value based on ++ * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user ++ * split should leave enough VA space for userspace processes to support SVM, ++ * while still allowing the kernel to map some amount of kernel objects in ++ * the kernel VA range). The value chosen by the driver will be returned in ++ * @user_va_range. ++ * ++ * User VA space always starts at 0x0, kernel VA space is always placed after ++ * the user VA range. ++ */ ++ __u64 user_va_range; ++}; ++ ++/** ++ * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY ++ */ ++struct drm_panthor_vm_destroy { ++ /** @id: ID of the VM to destroy. */ ++ __u32 id; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * enum drm_panthor_vm_bind_op_flags - VM bind operation flags ++ */ ++enum drm_panthor_vm_bind_op_flags { ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. ++ * ++ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), ++ ++ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, ++ ++ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, ++ ++ /** ++ * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. ++ * ++ * Just serves as a synchronization point on a VM queue. ++ * ++ * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags, ++ * and drm_panthor_vm_bind_op::syncs contains at least one element. ++ */ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, ++}; ++ ++/** ++ * struct drm_panthor_vm_bind_op - VM bind operation ++ */ ++struct drm_panthor_vm_bind_op { ++ /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ ++ __u32 flags; ++ ++ /** ++ * @bo_handle: Handle of the buffer object to map. ++ * MBZ for unmap or sync-only operations. ++ */ ++ __u32 bo_handle; ++ ++ /** ++ * @bo_offset: Buffer object offset. ++ * MBZ for unmap or sync-only operations. ++ */ ++ __u64 bo_offset; ++ ++ /** ++ * @va: Virtual address to map/unmap. ++ * MBZ for sync-only operations. ++ */ ++ __u64 va; ++ ++ /** ++ * @size: Size to map/unmap. ++ * MBZ for sync-only operations. ++ */ ++ __u64 size; ++ ++ /** ++ * @syncs: Array of struct drm_panthor_sync_op synchronization ++ * operations. ++ * ++ * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on ++ * the drm_panthor_vm_bind object containing this VM bind operation. ++ * ++ * This array shall not be empty for sync-only operations. ++ */ ++ struct drm_panthor_obj_array syncs; ++ ++}; ++ ++/** ++ * enum drm_panthor_vm_bind_flags - VM bind flags ++ */ ++enum drm_panthor_vm_bind_flags { ++ /** ++ * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM ++ * queue instead of being executed synchronously. ++ */ ++ DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, ++}; ++ ++/** ++ * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND ++ */ ++struct drm_panthor_vm_bind { ++ /** @vm_id: VM targeted by the bind request. */ ++ __u32 vm_id; ++ ++ /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ ++ __u32 flags; ++ ++ /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ ++ struct drm_panthor_obj_array ops; ++}; ++ ++/** ++ * enum drm_panthor_vm_state - VM states. ++ */ ++enum drm_panthor_vm_state { ++ /** ++ * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. ++ * ++ * New VM operations will be accepted on this VM. ++ */ ++ DRM_PANTHOR_VM_STATE_USABLE, ++ ++ /** ++ * @DRM_PANTHOR_VM_STATE_UNUSABLE: VM is unusable. ++ * ++ * Something put the VM in an unusable state (like an asynchronous ++ * VM_BIND request failing for any reason). ++ * ++ * Once the VM is in this state, all new MAP operations will be ++ * rejected, and any GPU job targeting this VM will fail. ++ * UNMAP operations are still accepted. ++ * ++ * The only way to recover from an unusable VM is to create a new ++ * VM, and destroy the old one. ++ */ ++ DRM_PANTHOR_VM_STATE_UNUSABLE, ++}; ++ ++/** ++ * struct drm_panthor_vm_get_state - Get VM state. ++ */ ++struct drm_panthor_vm_get_state { ++ /** @vm_id: VM targeted by the get_state request. */ ++ __u32 vm_id; ++ ++ /** ++ * @state: state returned by the driver. ++ * ++ * Must be one of the enum drm_panthor_vm_state values. ++ */ ++ __u32 state; ++}; ++ ++/** ++ * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. ++ */ ++enum drm_panthor_bo_flags { ++ /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ ++ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), ++}; ++ ++/** ++ * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE. ++ */ ++struct drm_panthor_bo_create { ++ /** ++ * @size: Requested size for the object ++ * ++ * The (page-aligned) allocated size for the object will be returned. ++ */ ++ __u64 size; ++ ++ /** ++ * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. ++ */ ++ __u32 flags; ++ ++ /** ++ * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. ++ * ++ * If not zero, the field must refer to a valid VM ID, and implies that: ++ * - the buffer object will only ever be bound to that VM ++ * - cannot be exported as a PRIME fd ++ */ ++ __u32 exclusive_vm_id; ++ ++ /** ++ * @handle: Returned handle for the object. ++ * ++ * Object handles are nonzero. ++ */ ++ __u32 handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. ++ */ ++struct drm_panthor_bo_mmap_offset { ++ /** @handle: Handle of the object we want an mmap offset for. */ ++ __u32 handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @offset: The fake offset to use for subsequent mmap calls. */ ++ __u64 offset; ++}; ++ ++/** ++ * struct drm_panthor_queue_create - Queue creation arguments. ++ */ ++struct drm_panthor_queue_create { ++ /** ++ * @priority: Defines the priority of queues inside a group. Goes from 0 to 15, ++ * 15 being the highest priority. ++ */ ++ __u8 priority; ++ ++ /** @pad: Padding fields, MBZ. */ ++ __u8 pad[3]; ++ ++ /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ ++ __u32 ringbuf_size; ++}; ++ ++/** ++ * enum drm_panthor_group_priority - Scheduling group priority ++ */ ++enum drm_panthor_group_priority { ++ /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ ++ PANTHOR_GROUP_PRIORITY_LOW = 0, ++ ++ /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ ++ PANTHOR_GROUP_PRIORITY_MEDIUM, ++ ++ /** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */ ++ PANTHOR_GROUP_PRIORITY_HIGH, ++}; ++ ++/** ++ * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE ++ */ ++struct drm_panthor_group_create { ++ /** @queues: Array of drm_panthor_queue_create elements. */ ++ struct drm_panthor_obj_array queues; ++ ++ /** ++ * @max_compute_cores: Maximum number of cores that can be used by compute ++ * jobs across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @compute_core_mask. ++ */ ++ __u8 max_compute_cores; ++ ++ /** ++ * @max_fragment_cores: Maximum number of cores that can be used by fragment ++ * jobs across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @fragment_core_mask. ++ */ ++ __u8 max_fragment_cores; ++ ++ /** ++ * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs ++ * across CS queues bound to this group. ++ * ++ * Must be less or equal to the number of bits set in @tiler_core_mask. ++ */ ++ __u8 max_tiler_cores; ++ ++ /** @priority: Group priority (see enum drm_panthor_group_priority). */ ++ __u8 priority; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++ ++ /** ++ * @compute_core_mask: Mask encoding cores that can be used for compute jobs. ++ * ++ * This field must have at least @max_compute_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. ++ */ ++ __u64 compute_core_mask; ++ ++ /** ++ * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs. ++ * ++ * This field must have at least @max_fragment_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. ++ */ ++ __u64 fragment_core_mask; ++ ++ /** ++ * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs. ++ * ++ * This field must have at least @max_tiler_cores bits set. ++ * ++ * The bits set here should also be set in drm_panthor_gpu_info::tiler_present. ++ */ ++ __u64 tiler_core_mask; ++ ++ /** ++ * @vm_id: VM ID to bind this group to. ++ * ++ * All submission to queues bound to this group will use this VM. ++ */ ++ __u32 vm_id; ++ ++ /** ++ * @group_handle: Returned group handle. Passed back when submitting jobs or ++ * destroying a group. ++ */ ++ __u32 group_handle; ++}; ++ ++/** ++ * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY ++ */ ++struct drm_panthor_group_destroy { ++ /** @group_handle: Group to destroy */ ++ __u32 group_handle; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_queue_submit - Job submission arguments. ++ * ++ * This is describing the userspace command stream to call from the kernel ++ * command stream ring-buffer. Queue submission is always part of a group ++ * submission, taking one or more jobs to submit to the underlying queues. ++ */ ++struct drm_panthor_queue_submit { ++ /** @queue_index: Index of the queue inside a group. */ ++ __u32 queue_index; ++ ++ /** ++ * @stream_size: Size of the command stream to execute. ++ * ++ * Must be 64-bit/8-byte aligned (the size of a CS instruction) ++ * ++ * Can be zero if stream_addr is zero too. ++ */ ++ __u32 stream_size; ++ ++ /** ++ * @stream_addr: GPU address of the command stream to execute. ++ * ++ * Must be aligned on 64-byte. ++ * ++ * Can be zero is stream_size is zero too. ++ */ ++ __u64 stream_addr; ++ ++ /** ++ * @latest_flush: FLUSH_ID read at the time the stream was built. ++ * ++ * This allows cache flush elimination for the automatic ++ * flush+invalidate(all) done at submission time, which is needed to ++ * ensure the GPU doesn't get garbage when reading the indirect command ++ * stream buffers. If you want the cache flush to happen ++ * unconditionally, pass a zero here. ++ */ ++ __u32 latest_flush; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ ++ struct drm_panthor_obj_array syncs; ++}; ++ ++/** ++ * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT ++ */ ++struct drm_panthor_group_submit { ++ /** @group_handle: Handle of the group to queue jobs to. */ ++ __u32 group_handle; ++ ++ /** @pad: MBZ. */ ++ __u32 pad; ++ ++ /** @queue_submits: Array of drm_panthor_queue_submit objects. */ ++ struct drm_panthor_obj_array queue_submits; ++}; ++ ++/** ++ * enum drm_panthor_group_state_flags - Group state flags ++ */ ++enum drm_panthor_group_state_flags { ++ /** ++ * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. ++ * ++ * When a group ends up with this flag set, no jobs can be submitted to its queues. ++ */ ++ DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, ++ ++ /** ++ * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. ++ * ++ * When a group ends up with this flag set, no jobs can be submitted to its queues. ++ */ ++ DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, ++}; ++ ++/** ++ * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE ++ * ++ * Used to query the state of a group and decide whether a new group should be created to ++ * replace it. ++ */ ++struct drm_panthor_group_get_state { ++ /** @group_handle: Handle of the group to query state on */ ++ __u32 group_handle; ++ ++ /** ++ * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the ++ * group state. ++ */ ++ __u32 state; ++ ++ /** @fatal_queues: Bitmask of queues that faced fatal faults. */ ++ __u32 fatal_queues; ++ ++ /** @pad: MBZ */ ++ __u32 pad; ++}; ++ ++/** ++ * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE ++ */ ++struct drm_panthor_tiler_heap_create { ++ /** @vm_id: VM ID the tiler heap should be mapped to */ ++ __u32 vm_id; ++ ++ /** @initial_chunk_count: Initial number of chunks to allocate. */ ++ __u32 initial_chunk_count; ++ ++ /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ ++ __u32 chunk_size; ++ ++ /** @max_chunks: Maximum number of chunks that can be allocated. */ ++ __u32 max_chunks; ++ ++ /** ++ * @target_in_flight: Maximum number of in-flight render passes. ++ * ++ * If the heap has more than tiler jobs in-flight, the FW will wait for render ++ * passes to finish before queuing new tiler jobs. ++ */ ++ __u32 target_in_flight; ++ ++ /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ ++ __u32 handle; ++ ++ /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */ ++ __u64 tiler_heap_ctx_gpu_va; ++ ++ /** ++ * @first_heap_chunk_gpu_va: First heap chunk. ++ * ++ * The tiler heap is formed of heap chunks forming a single-link list. This ++ * is the first element in the list. ++ */ ++ __u64 first_heap_chunk_gpu_va; ++}; ++ ++/** ++ * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY ++ */ ++struct drm_panthor_tiler_heap_destroy { ++ /** @handle: Handle of the tiler heap to destroy */ ++ __u32 handle; ++ ++ /** @pad: Padding field, MBZ. */ ++ __u32 pad; ++}; ++ ++#if defined(__cplusplus) ++} ++#endif ++ ++#endif /* _PANTHOR_DRM_H_ */ +-- +2.42.0 + + +From e99edf79c73ded0dee32664cac346918c14e27d6 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:16 +0100 +Subject: [PATCH 04/71] [MERGED] drm/panthor: Add GPU register definitions + +Those are the registers directly accessible through the MMIO range. + +FW registers are exposed in panthor_fw.h. + +v6: +- Add Maxime's and Heiko's acks + +v4: +- Add the CORE_FEATURES register (needed for GPU variants) +- Add Steve's R-b + +v3: +- Add macros to extract GPU ID info +- Formatting changes +- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF +- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT +- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-3-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++ + 1 file changed, 239 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_regs.h + +diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h +new file mode 100644 +index 000000000000..b7b3b3add166 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_regs.h +@@ -0,0 +1,239 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++/* ++ * Register definitions based on mali_kbase_gpu_regmap.h and ++ * mali_kbase_gpu_regmap_csf.h ++ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. ++ */ ++#ifndef __PANTHOR_REGS_H__ ++#define __PANTHOR_REGS_H__ ++ ++#define GPU_ID 0x0 ++#define GPU_ARCH_MAJOR(x) ((x) >> 28) ++#define GPU_ARCH_MINOR(x) (((x) & GENMASK(27, 24)) >> 24) ++#define GPU_ARCH_REV(x) (((x) & GENMASK(23, 20)) >> 20) ++#define GPU_PROD_MAJOR(x) (((x) & GENMASK(19, 16)) >> 16) ++#define GPU_VER_MAJOR(x) (((x) & GENMASK(15, 12)) >> 12) ++#define GPU_VER_MINOR(x) (((x) & GENMASK(11, 4)) >> 4) ++#define GPU_VER_STATUS(x) ((x) & GENMASK(3, 0)) ++ ++#define GPU_L2_FEATURES 0x4 ++#define GPU_L2_FEATURES_LINE_SIZE(x) (1 << ((x) & GENMASK(7, 0))) ++ ++#define GPU_CORE_FEATURES 0x8 ++ ++#define GPU_TILER_FEATURES 0xC ++#define GPU_MEM_FEATURES 0x10 ++#define GROUPS_L2_COHERENT BIT(0) ++ ++#define GPU_MMU_FEATURES 0x14 ++#define GPU_MMU_FEATURES_VA_BITS(x) ((x) & GENMASK(7, 0)) ++#define GPU_MMU_FEATURES_PA_BITS(x) (((x) >> 8) & GENMASK(7, 0)) ++#define GPU_AS_PRESENT 0x18 ++#define GPU_CSF_ID 0x1C ++ ++#define GPU_INT_RAWSTAT 0x20 ++#define GPU_INT_CLEAR 0x24 ++#define GPU_INT_MASK 0x28 ++#define GPU_INT_STAT 0x2c ++#define GPU_IRQ_FAULT BIT(0) ++#define GPU_IRQ_PROTM_FAULT BIT(1) ++#define GPU_IRQ_RESET_COMPLETED BIT(8) ++#define GPU_IRQ_POWER_CHANGED BIT(9) ++#define GPU_IRQ_POWER_CHANGED_ALL BIT(10) ++#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17) ++#define GPU_IRQ_DOORBELL_MIRROR BIT(18) ++#define GPU_IRQ_MCU_STATUS_CHANGED BIT(19) ++#define GPU_CMD 0x30 ++#define GPU_CMD_DEF(type, payload) ((type) | ((payload) << 8)) ++#define GPU_SOFT_RESET GPU_CMD_DEF(1, 1) ++#define GPU_HARD_RESET GPU_CMD_DEF(1, 2) ++#define CACHE_CLEAN BIT(0) ++#define CACHE_INV BIT(1) ++#define GPU_FLUSH_CACHES(l2, lsc, oth) \ ++ GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8)) ++ ++#define GPU_STATUS 0x34 ++#define GPU_STATUS_ACTIVE BIT(0) ++#define GPU_STATUS_PWR_ACTIVE BIT(1) ++#define GPU_STATUS_PAGE_FAULT BIT(4) ++#define GPU_STATUS_PROTM_ACTIVE BIT(7) ++#define GPU_STATUS_DBG_ENABLED BIT(8) ++ ++#define GPU_FAULT_STATUS 0x3C ++#define GPU_FAULT_ADDR_LO 0x40 ++#define GPU_FAULT_ADDR_HI 0x44 ++ ++#define GPU_PWR_KEY 0x50 ++#define GPU_PWR_KEY_UNLOCK 0x2968A819 ++#define GPU_PWR_OVERRIDE0 0x54 ++#define GPU_PWR_OVERRIDE1 0x58 ++ ++#define GPU_TIMESTAMP_OFFSET_LO 0x88 ++#define GPU_TIMESTAMP_OFFSET_HI 0x8C ++#define GPU_CYCLE_COUNT_LO 0x90 ++#define GPU_CYCLE_COUNT_HI 0x94 ++#define GPU_TIMESTAMP_LO 0x98 ++#define GPU_TIMESTAMP_HI 0x9C ++ ++#define GPU_THREAD_MAX_THREADS 0xA0 ++#define GPU_THREAD_MAX_WORKGROUP_SIZE 0xA4 ++#define GPU_THREAD_MAX_BARRIER_SIZE 0xA8 ++#define GPU_THREAD_FEATURES 0xAC ++ ++#define GPU_TEXTURE_FEATURES(n) (0xB0 + ((n) * 4)) ++ ++#define GPU_SHADER_PRESENT_LO 0x100 ++#define GPU_SHADER_PRESENT_HI 0x104 ++#define GPU_TILER_PRESENT_LO 0x110 ++#define GPU_TILER_PRESENT_HI 0x114 ++#define GPU_L2_PRESENT_LO 0x120 ++#define GPU_L2_PRESENT_HI 0x124 ++ ++#define SHADER_READY_LO 0x140 ++#define SHADER_READY_HI 0x144 ++#define TILER_READY_LO 0x150 ++#define TILER_READY_HI 0x154 ++#define L2_READY_LO 0x160 ++#define L2_READY_HI 0x164 ++ ++#define SHADER_PWRON_LO 0x180 ++#define SHADER_PWRON_HI 0x184 ++#define TILER_PWRON_LO 0x190 ++#define TILER_PWRON_HI 0x194 ++#define L2_PWRON_LO 0x1A0 ++#define L2_PWRON_HI 0x1A4 ++ ++#define SHADER_PWROFF_LO 0x1C0 ++#define SHADER_PWROFF_HI 0x1C4 ++#define TILER_PWROFF_LO 0x1D0 ++#define TILER_PWROFF_HI 0x1D4 ++#define L2_PWROFF_LO 0x1E0 ++#define L2_PWROFF_HI 0x1E4 ++ ++#define SHADER_PWRTRANS_LO 0x200 ++#define SHADER_PWRTRANS_HI 0x204 ++#define TILER_PWRTRANS_LO 0x210 ++#define TILER_PWRTRANS_HI 0x214 ++#define L2_PWRTRANS_LO 0x220 ++#define L2_PWRTRANS_HI 0x224 ++ ++#define SHADER_PWRACTIVE_LO 0x240 ++#define SHADER_PWRACTIVE_HI 0x244 ++#define TILER_PWRACTIVE_LO 0x250 ++#define TILER_PWRACTIVE_HI 0x254 ++#define L2_PWRACTIVE_LO 0x260 ++#define L2_PWRACTIVE_HI 0x264 ++ ++#define GPU_REVID 0x280 ++ ++#define GPU_COHERENCY_FEATURES 0x300 ++#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) ++ ++#define GPU_COHERENCY_PROTOCOL 0x304 ++#define GPU_COHERENCY_ACE 0 ++#define GPU_COHERENCY_ACE_LITE 1 ++#define GPU_COHERENCY_NONE 31 ++ ++#define MCU_CONTROL 0x700 ++#define MCU_CONTROL_ENABLE 1 ++#define MCU_CONTROL_AUTO 2 ++#define MCU_CONTROL_DISABLE 0 ++ ++#define MCU_STATUS 0x704 ++#define MCU_STATUS_DISABLED 0 ++#define MCU_STATUS_ENABLED 1 ++#define MCU_STATUS_HALT 2 ++#define MCU_STATUS_FATAL 3 ++ ++/* Job Control regs */ ++#define JOB_INT_RAWSTAT 0x1000 ++#define JOB_INT_CLEAR 0x1004 ++#define JOB_INT_MASK 0x1008 ++#define JOB_INT_STAT 0x100c ++#define JOB_INT_GLOBAL_IF BIT(31) ++#define JOB_INT_CSG_IF(x) BIT(x) ++ ++/* MMU regs */ ++#define MMU_INT_RAWSTAT 0x2000 ++#define MMU_INT_CLEAR 0x2004 ++#define MMU_INT_MASK 0x2008 ++#define MMU_INT_STAT 0x200c ++ ++/* AS_COMMAND register commands */ ++ ++#define MMU_BASE 0x2400 ++#define MMU_AS_SHIFT 6 ++#define MMU_AS(as) (MMU_BASE + ((as) << MMU_AS_SHIFT)) ++ ++#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x0) ++#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x4) ++#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x8) ++#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0xC) ++#define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2) ++#define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \ ++ ((w) ? BIT(0) : 0) | \ ++ ((r) ? BIT(1) : 0)) ++#define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4) ++#define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4) ++#define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4) ++#define AS_MEMATTR_AARCH64_SHARED (0 << 6) ++#define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6) ++#define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6) ++#define AS_MEMATTR_AARCH64_FAULT (3 << 6) ++#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) ++#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) ++#define AS_COMMAND(as) (MMU_AS(as) + 0x18) ++#define AS_COMMAND_NOP 0 ++#define AS_COMMAND_UPDATE 1 ++#define AS_COMMAND_LOCK 2 ++#define AS_COMMAND_UNLOCK 3 ++#define AS_COMMAND_FLUSH_PT 4 ++#define AS_COMMAND_FLUSH_MEM 5 ++#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) ++#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C) ++#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) ++#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) ++#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) ++#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) ++#define AS_STATUS(as) (MMU_AS(as) + 0x28) ++#define AS_STATUS_AS_ACTIVE BIT(0) ++#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) ++#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) ++#define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0) ++#define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0) ++#define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0) ++#define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0) ++#define AS_TRANSCFG_INA_BITS(x) ((x) << 6) ++#define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14) ++#define AS_TRANSCFG_SL_CONCAT BIT(22) ++#define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24) ++#define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24) ++#define AS_TRANSCFG_PTW_SH_NS (0 << 28) ++#define AS_TRANSCFG_PTW_SH_OS (2 << 28) ++#define AS_TRANSCFG_PTW_SH_IS (3 << 28) ++#define AS_TRANSCFG_PTW_RA BIT(30) ++#define AS_TRANSCFG_DISABLE_HIER_AP BIT(33) ++#define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34) ++#define AS_TRANSCFG_WXN BIT(35) ++#define AS_TRANSCFG_XREADABLE BIT(36) ++#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) ++#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) ++ ++#define CSF_GPU_LATEST_FLUSH_ID 0x10000 ++ ++#define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) ++#define CSF_GLB_DOORBELL_ID 0 ++ ++#define gpu_write(dev, reg, data) \ ++ writel(data, (dev)->iomem + (reg)) ++ ++#define gpu_read(dev, reg) \ ++ readl((dev)->iomem + (reg)) ++ ++#endif +-- +2.42.0 + + +From 6f4ea11ab631a15ac300e40141560040c8a2ce18 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:17 +0100 +Subject: [PATCH 05/71] [MERGED] drm/panthor: Add the device logical block + +The panthor driver is designed in a modular way, where each logical +block is dealing with a specific HW-block or software feature. In order +for those blocks to communicate with each other, we need a central +panthor_device collecting all the blocks, and exposing some common +features, like interrupt handling, power management, reset, ... + +This what this panthor_device logical block is about. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v5: +- Suspend the MMU/GPU blocks if panthor_fw_resume() fails in + panthor_device_resume() +- Move the pm_runtime_use_autosuspend() call before drm_dev_register() +- Add Liviu's R-b + +v4: +- Check drmm_mutex_init() return code +- Fix panthor_device_reset_work() out path +- Fix the race in the unplug logic +- Fix typos +- Unplug blocks when something fails in panthor_device_init() +- Add Steve's R-b + +v3: +- Add acks for the MIT+GPL2 relicensing +- Fix 32-bit support +- Shorten the sections protected by panthor_device::pm::mmio_lock to fix + lock ordering issues. +- Rename panthor_device::pm::lock into panthor_device::pm::mmio_lock to + better reflect what this lock is protecting +- Use dev_err_probe() +- Make sure we call drm_dev_exit() when something fails half-way in + panthor_device_reset_work() +- Replace CSF_GPU_LATEST_FLUSH_ID_DEFAULT with a constant '1' and a + comment to explain. Also remove setting the dummy flush ID on suspend. +- Remove drm_WARN_ON() in panthor_exception_name() +- Check pirq->suspended in panthor_xxx_irq_raw_handler() + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-4-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_device.c | 549 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_device.h | 394 ++++++++++++++++ + 2 files changed, 943 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_device.c + create mode 100644 drivers/gpu/drm/panthor/panthor_device.h + +diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c +new file mode 100644 +index 000000000000..bfe8da4a6e4c +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_device.c +@@ -0,0 +1,549 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gpu.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++static int panthor_clk_init(struct panthor_device *ptdev) ++{ ++ ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL); ++ if (IS_ERR(ptdev->clks.core)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.core), ++ "get 'core' clock failed"); ++ ++ ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks"); ++ if (IS_ERR(ptdev->clks.stacks)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.stacks), ++ "get 'stacks' clock failed"); ++ ++ ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup"); ++ if (IS_ERR(ptdev->clks.coregroup)) ++ return dev_err_probe(ptdev->base.dev, ++ PTR_ERR(ptdev->clks.coregroup), ++ "get 'coregroup' clock failed"); ++ ++ drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core)); ++ return 0; ++} ++ ++void panthor_device_unplug(struct panthor_device *ptdev) ++{ ++ /* This function can be called from two different path: the reset work ++ * and the platform device remove callback. drm_dev_unplug() doesn't ++ * deal with concurrent callers, so we have to protect drm_dev_unplug() ++ * calls with our own lock, and bail out if the device is already ++ * unplugged. ++ */ ++ mutex_lock(&ptdev->unplug.lock); ++ if (drm_dev_is_unplugged(&ptdev->base)) { ++ /* Someone beat us, release the lock and wait for the unplug ++ * operation to be reported as done. ++ **/ ++ mutex_unlock(&ptdev->unplug.lock); ++ wait_for_completion(&ptdev->unplug.done); ++ return; ++ } ++ ++ /* Call drm_dev_unplug() so any access to HW blocks happening after ++ * that point get rejected. ++ */ ++ drm_dev_unplug(&ptdev->base); ++ ++ /* We do the rest of the unplug with the unplug lock released, ++ * future callers will wait on ptdev->unplug.done anyway. ++ */ ++ mutex_unlock(&ptdev->unplug.lock); ++ ++ drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0); ++ ++ /* Now, try to cleanly shutdown the GPU before the device resources ++ * get reclaimed. ++ */ ++ panthor_sched_unplug(ptdev); ++ panthor_fw_unplug(ptdev); ++ panthor_mmu_unplug(ptdev); ++ panthor_gpu_unplug(ptdev); ++ ++ pm_runtime_dont_use_autosuspend(ptdev->base.dev); ++ pm_runtime_put_sync_suspend(ptdev->base.dev); ++ ++ /* Report the unplug operation as done to unblock concurrent ++ * panthor_device_unplug() callers. ++ */ ++ complete_all(&ptdev->unplug.done); ++} ++ ++static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ ++ cancel_work_sync(&ptdev->reset.work); ++ destroy_workqueue(ptdev->reset.wq); ++} ++ ++static void panthor_device_reset_work(struct work_struct *work) ++{ ++ struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work); ++ int ret = 0, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) { ++ /* ++ * No need for a reset as the device has been (or will be) ++ * powered down ++ */ ++ atomic_set(&ptdev->reset.pending, 0); ++ return; ++ } ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return; ++ ++ panthor_sched_pre_reset(ptdev); ++ panthor_fw_pre_reset(ptdev, true); ++ panthor_mmu_pre_reset(ptdev); ++ panthor_gpu_soft_reset(ptdev); ++ panthor_gpu_l2_power_on(ptdev); ++ panthor_mmu_post_reset(ptdev); ++ ret = panthor_fw_post_reset(ptdev); ++ if (ret) ++ goto out_dev_exit; ++ ++ atomic_set(&ptdev->reset.pending, 0); ++ panthor_sched_post_reset(ptdev); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ ++ if (ret) { ++ panthor_device_unplug(ptdev); ++ drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable."); ++ } ++} ++ ++static bool panthor_device_is_initialized(struct panthor_device *ptdev) ++{ ++ return !!ptdev->scheduler; ++} ++ ++static void panthor_device_free_page(struct drm_device *ddev, void *data) ++{ ++ free_page((unsigned long)data); ++} ++ ++int panthor_device_init(struct panthor_device *ptdev) ++{ ++ struct resource *res; ++ struct page *p; ++ int ret; ++ ++ ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; ++ ++ init_completion(&ptdev->unplug.done); ++ ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); ++ if (ret) ++ return ret; ++ ++ ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock); ++ if (ret) ++ return ret; ++ ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ p = alloc_page(GFP_KERNEL | __GFP_ZERO); ++ if (!p) ++ return -ENOMEM; ++ ++ ptdev->pm.dummy_latest_flush = page_address(p); ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page, ++ ptdev->pm.dummy_latest_flush); ++ if (ret) ++ return ret; ++ ++ /* ++ * Set the dummy page holding the latest flush to 1. This will cause the ++ * flush to avoided as we know it isn't necessary if the submission ++ * happens while the dummy page is mapped. Zero cannot be used because ++ * that means 'always flush'. ++ */ ++ *ptdev->pm.dummy_latest_flush = 1; ++ ++ INIT_WORK(&ptdev->reset.work, panthor_device_reset_work); ++ ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0); ++ if (!ptdev->reset.wq) ++ return -ENOMEM; ++ ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL); ++ if (ret) ++ return ret; ++ ++ ret = panthor_clk_init(ptdev); ++ if (ret) ++ return ret; ++ ++ ret = panthor_devfreq_init(ptdev); ++ if (ret) ++ return ret; ++ ++ ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev), ++ 0, &res); ++ if (IS_ERR(ptdev->iomem)) ++ return PTR_ERR(ptdev->iomem); ++ ++ ptdev->phys_addr = res->start; ++ ++ ret = devm_pm_runtime_enable(ptdev->base.dev); ++ if (ret) ++ return ret; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (ret) ++ return ret; ++ ++ ret = panthor_gpu_init(ptdev); ++ if (ret) ++ goto err_rpm_put; ++ ++ ret = panthor_mmu_init(ptdev); ++ if (ret) ++ goto err_unplug_gpu; ++ ++ ret = panthor_fw_init(ptdev); ++ if (ret) ++ goto err_unplug_mmu; ++ ++ ret = panthor_sched_init(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ /* ~3 frames */ ++ pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50); ++ pm_runtime_use_autosuspend(ptdev->base.dev); ++ ++ ret = drm_dev_register(&ptdev->base, 0); ++ if (ret) ++ goto err_disable_autosuspend; ++ ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ return 0; ++ ++err_disable_autosuspend: ++ pm_runtime_dont_use_autosuspend(ptdev->base.dev); ++ panthor_sched_unplug(ptdev); ++ ++err_unplug_fw: ++ panthor_fw_unplug(ptdev); ++ ++err_unplug_mmu: ++ panthor_mmu_unplug(ptdev); ++ ++err_unplug_gpu: ++ panthor_gpu_unplug(ptdev); ++ ++err_rpm_put: ++ pm_runtime_put_sync_suspend(ptdev->base.dev); ++ return ret; ++} ++ ++#define PANTHOR_EXCEPTION(id) \ ++ [DRM_PANTHOR_EXCEPTION_ ## id] = { \ ++ .name = #id, \ ++ } ++ ++struct panthor_exception_info { ++ const char *name; ++}; ++ ++static const struct panthor_exception_info panthor_exception_infos[] = { ++ PANTHOR_EXCEPTION(OK), ++ PANTHOR_EXCEPTION(TERMINATED), ++ PANTHOR_EXCEPTION(KABOOM), ++ PANTHOR_EXCEPTION(EUREKA), ++ PANTHOR_EXCEPTION(ACTIVE), ++ PANTHOR_EXCEPTION(CS_RES_TERM), ++ PANTHOR_EXCEPTION(CS_CONFIG_FAULT), ++ PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), ++ PANTHOR_EXCEPTION(CS_BUS_FAULT), ++ PANTHOR_EXCEPTION(CS_INSTR_INVALID), ++ PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW), ++ PANTHOR_EXCEPTION(CS_INHERIT_FAULT), ++ PANTHOR_EXCEPTION(INSTR_INVALID_PC), ++ PANTHOR_EXCEPTION(INSTR_INVALID_ENC), ++ PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT), ++ PANTHOR_EXCEPTION(DATA_INVALID_FAULT), ++ PANTHOR_EXCEPTION(TILE_RANGE_FAULT), ++ PANTHOR_EXCEPTION(ADDR_RANGE_FAULT), ++ PANTHOR_EXCEPTION(IMPRECISE_FAULT), ++ PANTHOR_EXCEPTION(OOM), ++ PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR), ++ PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT), ++ PANTHOR_EXCEPTION(GPU_BUS_FAULT), ++ PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT), ++ PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT), ++ PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_0), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_1), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_2), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_3), ++ PANTHOR_EXCEPTION(TRANSLATION_FAULT_4), ++ PANTHOR_EXCEPTION(PERM_FAULT_0), ++ PANTHOR_EXCEPTION(PERM_FAULT_1), ++ PANTHOR_EXCEPTION(PERM_FAULT_2), ++ PANTHOR_EXCEPTION(PERM_FAULT_3), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_1), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_2), ++ PANTHOR_EXCEPTION(ACCESS_FLAG_3), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2), ++ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2), ++ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3), ++}; ++ ++const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code) ++{ ++ if (exception_code >= ARRAY_SIZE(panthor_exception_infos) || ++ !panthor_exception_infos[exception_code].name) ++ return "Unknown exception type"; ++ ++ return panthor_exception_infos[exception_code].name; ++} ++ ++static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf) ++{ ++ struct vm_area_struct *vma = vmf->vma; ++ struct panthor_device *ptdev = vma->vm_private_data; ++ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ unsigned long pfn; ++ pgprot_t pgprot; ++ vm_fault_t ret; ++ bool active; ++ int cookie; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return VM_FAULT_SIGBUS; ++ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE; ++ ++ switch (panthor_device_mmio_offset(id)) { ++ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: ++ if (active) ++ pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID); ++ else ++ pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush); ++ break; ++ ++ default: ++ ret = VM_FAULT_SIGBUS; ++ goto out_unlock; ++ } ++ ++ pgprot = vma->vm_page_prot; ++ if (active) ++ pgprot = pgprot_noncached(pgprot); ++ ++ ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot); ++ ++out_unlock: ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static const struct vm_operations_struct panthor_mmio_vm_ops = { ++ .fault = panthor_mmio_vm_fault, ++}; ++ ++int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma) ++{ ++ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ ++ switch (panthor_device_mmio_offset(id)) { ++ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: ++ if (vma->vm_end - vma->vm_start != PAGE_SIZE || ++ (vma->vm_flags & (VM_WRITE | VM_EXEC))) ++ return -EINVAL; ++ ++ break; ++ ++ default: ++ return -EINVAL; ++ } ++ ++ /* Defer actual mapping to the fault handler. */ ++ vma->vm_private_data = ptdev; ++ vma->vm_ops = &panthor_mmio_vm_ops; ++ vm_flags_set(vma, ++ VM_IO | VM_DONTCOPY | VM_DONTEXPAND | ++ VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP); ++ return 0; ++} ++ ++#ifdef CONFIG_PM ++int panthor_device_resume(struct device *dev) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ int ret, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED) ++ return -EINVAL; ++ ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING); ++ ++ ret = clk_prepare_enable(ptdev->clks.core); ++ if (ret) ++ goto err_set_suspended; ++ ++ ret = clk_prepare_enable(ptdev->clks.stacks); ++ if (ret) ++ goto err_disable_core_clk; ++ ++ ret = clk_prepare_enable(ptdev->clks.coregroup); ++ if (ret) ++ goto err_disable_stacks_clk; ++ ++ ret = panthor_devfreq_resume(ptdev); ++ if (ret) ++ goto err_disable_coregroup_clk; ++ ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_gpu_resume(ptdev); ++ panthor_mmu_resume(ptdev); ++ ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); ++ if (!ret) { ++ panthor_sched_resume(ptdev); ++ } else { ++ panthor_mmu_suspend(ptdev); ++ panthor_gpu_suspend(ptdev); ++ } ++ ++ drm_dev_exit(cookie); ++ ++ if (ret) ++ goto err_suspend_devfreq; ++ } ++ ++ if (atomic_read(&ptdev->reset.pending)) ++ queue_work(ptdev->reset.wq, &ptdev->reset.work); ++ ++ /* Clear all IOMEM mappings pointing to this device after we've ++ * resumed. This way the fake mappings pointing to the dummy pages ++ * are removed and the real iomem mapping will be restored on next ++ * access. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ return 0; ++ ++err_suspend_devfreq: ++ panthor_devfreq_suspend(ptdev); ++ ++err_disable_coregroup_clk: ++ clk_disable_unprepare(ptdev->clks.coregroup); ++ ++err_disable_stacks_clk: ++ clk_disable_unprepare(ptdev->clks.stacks); ++ ++err_disable_core_clk: ++ clk_disable_unprepare(ptdev->clks.core); ++ ++err_set_suspended: ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ return ret; ++} ++ ++int panthor_device_suspend(struct device *dev) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ int ret, cookie; ++ ++ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) ++ return -EINVAL; ++ ++ /* Clear all IOMEM mappings pointing to this device before we ++ * shutdown the power-domain and clocks. Failing to do that results ++ * in external aborts when the process accesses the iomem region. ++ * We change the state and call unmap_mapping_range() with the ++ * mmio_lock held to make sure the vm_fault handler won't set up ++ * invalid mappings. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ cancel_work_sync(&ptdev->reset.work); ++ ++ /* We prepare everything as if we were resetting the GPU. ++ * The end of the reset will happen in the resume path though. ++ */ ++ panthor_sched_suspend(ptdev); ++ panthor_fw_suspend(ptdev); ++ panthor_mmu_suspend(ptdev); ++ panthor_gpu_suspend(ptdev); ++ drm_dev_exit(cookie); ++ } ++ ++ ret = panthor_devfreq_suspend(ptdev); ++ if (ret) { ++ if (panthor_device_is_initialized(ptdev) && ++ drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_gpu_resume(ptdev); ++ panthor_mmu_resume(ptdev); ++ drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); ++ panthor_sched_resume(ptdev); ++ drm_dev_exit(cookie); ++ } ++ ++ goto err_set_active; ++ } ++ ++ clk_disable_unprepare(ptdev->clks.coregroup); ++ clk_disable_unprepare(ptdev->clks.stacks); ++ clk_disable_unprepare(ptdev->clks.core); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); ++ return 0; ++ ++err_set_active: ++ /* If something failed and we have to revert back to an ++ * active state, we also need to clear the MMIO userspace ++ * mappings, so any dumb pages that were mapped while we ++ * were trying to suspend gets invalidated. ++ */ ++ mutex_lock(&ptdev->pm.mmio_lock); ++ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); ++ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, ++ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); ++ mutex_unlock(&ptdev->pm.mmio_lock); ++ return ret; ++} ++#endif +diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h +new file mode 100644 +index 000000000000..51c9d61b6796 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_device.h +@@ -0,0 +1,394 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_DEVICE_H__ ++#define __PANTHOR_DEVICE_H__ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++ ++struct panthor_csf; ++struct panthor_csf_ctx; ++struct panthor_device; ++struct panthor_gpu; ++struct panthor_group_pool; ++struct panthor_heap_pool; ++struct panthor_job; ++struct panthor_mmu; ++struct panthor_fw; ++struct panthor_perfcnt; ++struct panthor_vm; ++struct panthor_vm_pool; ++ ++/** ++ * enum panthor_device_pm_state - PM state ++ */ ++enum panthor_device_pm_state { ++ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ ++ PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ ++ PANTHOR_DEVICE_PM_STATE_RESUMING, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ ++ PANTHOR_DEVICE_PM_STATE_ACTIVE, ++ ++ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ ++ PANTHOR_DEVICE_PM_STATE_SUSPENDING, ++}; ++ ++/** ++ * struct panthor_irq - IRQ data ++ * ++ * Used to automate IRQ handling for the 3 different IRQs we have in this driver. ++ */ ++struct panthor_irq { ++ /** @ptdev: Panthor device */ ++ struct panthor_device *ptdev; ++ ++ /** @irq: IRQ number. */ ++ int irq; ++ ++ /** @mask: Current mask being applied to xxx_INT_MASK. */ ++ u32 mask; ++ ++ /** @suspended: Set to true when the IRQ is suspended. */ ++ atomic_t suspended; ++}; ++ ++/** ++ * struct panthor_device - Panthor device ++ */ ++struct panthor_device { ++ /** @base: Base drm_device. */ ++ struct drm_device base; ++ ++ /** @phys_addr: Physical address of the iomem region. */ ++ phys_addr_t phys_addr; ++ ++ /** @iomem: CPU mapping of the IOMEM region. */ ++ void __iomem *iomem; ++ ++ /** @clks: GPU clocks. */ ++ struct { ++ /** @core: Core clock. */ ++ struct clk *core; ++ ++ /** @stacks: Stacks clock. This clock is optional. */ ++ struct clk *stacks; ++ ++ /** @coregroup: Core group clock. This clock is optional. */ ++ struct clk *coregroup; ++ } clks; ++ ++ /** @coherent: True if the CPU/GPU are memory coherent. */ ++ bool coherent; ++ ++ /** @gpu_info: GPU information. */ ++ struct drm_panthor_gpu_info gpu_info; ++ ++ /** @csif_info: Command stream interface information. */ ++ struct drm_panthor_csif_info csif_info; ++ ++ /** @gpu: GPU management data. */ ++ struct panthor_gpu *gpu; ++ ++ /** @fw: FW management data. */ ++ struct panthor_fw *fw; ++ ++ /** @mmu: MMU management data. */ ++ struct panthor_mmu *mmu; ++ ++ /** @scheduler: Scheduler management data. */ ++ struct panthor_scheduler *scheduler; ++ ++ /** @devfreq: Device frequency scaling management data. */ ++ struct panthor_devfreq *devfreq; ++ ++ /** @unplug: Device unplug related fields. */ ++ struct { ++ /** @lock: Lock used to serialize unplug operations. */ ++ struct mutex lock; ++ ++ /** ++ * @done: Completion object signaled when the unplug ++ * operation is done. ++ */ ++ struct completion done; ++ } unplug; ++ ++ /** @reset: Reset related fields. */ ++ struct { ++ /** @wq: Ordered worqueud used to schedule reset operations. */ ++ struct workqueue_struct *wq; ++ ++ /** @work: Reset work. */ ++ struct work_struct work; ++ ++ /** @pending: Set to true if a reset is pending. */ ++ atomic_t pending; ++ } reset; ++ ++ /** @pm: Power management related data. */ ++ struct { ++ /** @state: Power state. */ ++ atomic_t state; ++ ++ /** ++ * @mmio_lock: Lock protecting MMIO userspace CPU mappings. ++ * ++ * This is needed to ensure we map the dummy IO pages when ++ * the device is being suspended, and the real IO pages when ++ * the device is being resumed. We can't just do with the ++ * state atomicity to deal with this race. ++ */ ++ struct mutex mmio_lock; ++ ++ /** ++ * @dummy_latest_flush: Dummy LATEST_FLUSH page. ++ * ++ * Used to replace the real LATEST_FLUSH page when the GPU ++ * is suspended. ++ */ ++ u32 *dummy_latest_flush; ++ } pm; ++}; ++ ++/** ++ * struct panthor_file - Panthor file ++ */ ++struct panthor_file { ++ /** @ptdev: Device attached to this file. */ ++ struct panthor_device *ptdev; ++ ++ /** @vms: VM pool attached to this file. */ ++ struct panthor_vm_pool *vms; ++ ++ /** @groups: Scheduling group pool attached to this file. */ ++ struct panthor_group_pool *groups; ++}; ++ ++int panthor_device_init(struct panthor_device *ptdev); ++void panthor_device_unplug(struct panthor_device *ptdev); ++ ++/** ++ * panthor_device_schedule_reset() - Schedules a reset operation ++ */ ++static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) ++{ ++ if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && ++ atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) ++ queue_work(ptdev->reset.wq, &ptdev->reset.work); ++} ++ ++/** ++ * panthor_device_reset_is_pending() - Checks if a reset is pending. ++ * ++ * Return: true if a reset is pending, false otherwise. ++ */ ++static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) ++{ ++ return atomic_read(&ptdev->reset.pending) != 0; ++} ++ ++int panthor_device_mmap_io(struct panthor_device *ptdev, ++ struct vm_area_struct *vma); ++ ++int panthor_device_resume(struct device *dev); ++int panthor_device_suspend(struct device *dev); ++ ++enum drm_panthor_exception_type { ++ DRM_PANTHOR_EXCEPTION_OK = 0x00, ++ DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, ++ DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, ++ DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, ++ DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, ++ DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, ++ DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, ++ DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, ++ DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, ++ DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, ++ DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, ++ DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, ++ DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, ++ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, ++ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, ++ DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, ++ DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, ++ DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, ++ DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, ++ DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, ++ DRM_PANTHOR_EXCEPTION_OOM = 0x60, ++ DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, ++ DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, ++ DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, ++ DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, ++ DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, ++ DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, ++ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, ++ DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, ++ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, ++ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, ++ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, ++}; ++ ++/** ++ * panthor_exception_is_fault() - Checks if an exception is a fault. ++ * ++ * Return: true if the exception is a fault, false otherwise. ++ */ ++static inline bool ++panthor_exception_is_fault(u32 exception_code) ++{ ++ return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; ++} ++ ++const char *panthor_exception_name(struct panthor_device *ptdev, ++ u32 exception_code); ++ ++/** ++ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt ++ * registration function. ++ * ++ * The boiler-plate to gracefully deal with shared interrupts is ++ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() ++ * just after the actual handler. The handler prototype is: ++ * ++ * void (*handler)(struct panthor_device *, u32 status); ++ */ ++#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler) \ ++static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ ++{ \ ++ struct panthor_irq *pirq = data; \ ++ struct panthor_device *ptdev = pirq->ptdev; \ ++ \ ++ if (atomic_read(&pirq->suspended)) \ ++ return IRQ_NONE; \ ++ if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT)) \ ++ return IRQ_NONE; \ ++ \ ++ gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0); \ ++ return IRQ_WAKE_THREAD; \ ++} \ ++ \ ++static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ ++{ \ ++ struct panthor_irq *pirq = data; \ ++ struct panthor_device *ptdev = pirq->ptdev; \ ++ irqreturn_t ret = IRQ_NONE; \ ++ \ ++ while (true) { \ ++ u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask; \ ++ \ ++ if (!status) \ ++ break; \ ++ \ ++ gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \ ++ \ ++ __handler(ptdev, status); \ ++ ret = IRQ_HANDLED; \ ++ } \ ++ \ ++ if (!atomic_read(&pirq->suspended)) \ ++ gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask); \ ++ \ ++ return ret; \ ++} \ ++ \ ++static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ ++{ \ ++ int cookie; \ ++ \ ++ atomic_set(&pirq->suspended, true); \ ++ \ ++ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ ++ synchronize_irq(pirq->irq); \ ++ drm_dev_exit(cookie); \ ++ } \ ++ \ ++ pirq->mask = 0; \ ++} \ ++ \ ++static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ ++{ \ ++ int cookie; \ ++ \ ++ atomic_set(&pirq->suspended, false); \ ++ pirq->mask = mask; \ ++ \ ++ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ ++ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ ++ drm_dev_exit(cookie); \ ++ } \ ++} \ ++ \ ++static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ ++ struct panthor_irq *pirq, \ ++ int irq, u32 mask) \ ++{ \ ++ pirq->ptdev = ptdev; \ ++ pirq->irq = irq; \ ++ panthor_ ## __name ## _irq_resume(pirq, mask); \ ++ \ ++ return devm_request_threaded_irq(ptdev->base.dev, irq, \ ++ panthor_ ## __name ## _irq_raw_handler, \ ++ panthor_ ## __name ## _irq_threaded_handler, \ ++ IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ ++ pirq); \ ++} ++ ++/** ++ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one ++ * @offset: Offset to convert. ++ * ++ * With 32-bit systems being limited by the 32-bit representation of mmap2's ++ * pgoffset field, we need to make the MMIO offset arch specific. This function ++ * converts a user MMIO offset into something the kernel driver understands. ++ * ++ * If the kernel and userspace architecture match, the offset is unchanged. If ++ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match ++ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible. ++ * ++ * Return: Adjusted offset. ++ */ ++static inline u64 panthor_device_mmio_offset(u64 offset) ++{ ++#ifdef CONFIG_ARM64 ++ if (test_tsk_thread_flag(current, TIF_32BIT)) ++ offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; ++#endif ++ ++ return offset; ++} ++ ++extern struct workqueue_struct *panthor_cleanup_wq; ++ ++#endif +-- +2.42.0 + + +From 0368bf10445d1f9d1f409a733aa6b10bd255bdfa Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:18 +0100 +Subject: [PATCH 06/71] [MERGED] drm/panthor: Add the GPU logical block + +Handles everything that's not related to the FW, the MMU or the +scheduler. This is the block dealing with the GPU property retrieval, +the GPU block power on/off logic, and some global operations, like +global cache flushing. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix GPU_MODEL() kernel doc +- Fix test in panthor_gpu_block_power_off() +- Add Steve's R-b + +v4: +- Expose CORE_FEATURES through DEV_QUERY + +v3: +- Add acks for the MIT/GPL2 relicensing +- Use macros to extract GPU ID info +- Make sure we reset clear pending_reqs bits when wait_event_timeout() + times out but the corresponding bit is cleared in GPU_INT_RAWSTAT + (can happen if the IRQ is masked or HW takes to long to call the IRQ + handler) +- GPU_MODEL now takes separate arch and product majors to be more + readable. +- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask. +- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are + not updated for protected interrupts). +- Minor code tidy ups + +Cc: Alexey Sheplyakov # MIT+GPL2 relicensing +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-5-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_gpu.h | 52 +++ + 2 files changed, 534 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.c + create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.h + +diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c +new file mode 100644 +index 000000000000..6dbbc4cfbe7e +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gpu.c +@@ -0,0 +1,482 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd., Rob Herring */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gpu.h" ++#include "panthor_regs.h" ++ ++/** ++ * struct panthor_gpu - GPU block management data. ++ */ ++struct panthor_gpu { ++ /** @irq: GPU irq. */ ++ struct panthor_irq irq; ++ ++ /** @reqs_lock: Lock protecting access to pending_reqs. */ ++ spinlock_t reqs_lock; ++ ++ /** @pending_reqs: Pending GPU requests. */ ++ u32 pending_reqs; ++ ++ /** @reqs_acked: GPU request wait queue. */ ++ wait_queue_head_t reqs_acked; ++}; ++ ++/** ++ * struct panthor_model - GPU model description ++ */ ++struct panthor_model { ++ /** @name: Model name. */ ++ const char *name; ++ ++ /** @arch_major: Major version number of architecture. */ ++ u8 arch_major; ++ ++ /** @product_major: Major version number of product. */ ++ u8 product_major; ++}; ++ ++/** ++ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified ++ * by a combination of the major architecture version and the major product ++ * version. ++ * @_name: Name for the GPU model. ++ * @_arch_major: Architecture major. ++ * @_product_major: Product major. ++ */ ++#define GPU_MODEL(_name, _arch_major, _product_major) \ ++{\ ++ .name = __stringify(_name), \ ++ .arch_major = _arch_major, \ ++ .product_major = _product_major, \ ++} ++ ++static const struct panthor_model gpu_models[] = { ++ GPU_MODEL(g610, 10, 7), ++ {}, ++}; ++ ++#define GPU_INTERRUPTS_MASK \ ++ (GPU_IRQ_FAULT | \ ++ GPU_IRQ_PROTM_FAULT | \ ++ GPU_IRQ_RESET_COMPLETED | \ ++ GPU_IRQ_CLEAN_CACHES_COMPLETED) ++ ++static void panthor_gpu_init_info(struct panthor_device *ptdev) ++{ ++ const struct panthor_model *model; ++ u32 arch_major, product_major; ++ u32 major, minor, status; ++ unsigned int i; ++ ++ ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); ++ ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); ++ ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); ++ ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); ++ ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); ++ ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); ++ ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); ++ ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); ++ ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); ++ ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); ++ ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); ++ ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); ++ ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); ++ for (i = 0; i < 4; i++) ++ ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); ++ ++ ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); ++ ++ ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO); ++ ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32; ++ ++ ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO); ++ ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32; ++ ++ ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO); ++ ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32; ++ ++ arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); ++ product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); ++ major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); ++ minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); ++ status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); ++ ++ for (model = gpu_models; model->name; model++) { ++ if (model->arch_major == arch_major && ++ model->product_major == product_major) ++ break; ++ } ++ ++ drm_info(&ptdev->base, ++ "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", ++ model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16, ++ major, minor, status); ++ ++ drm_info(&ptdev->base, ++ "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", ++ ptdev->gpu_info.l2_features, ++ ptdev->gpu_info.tiler_features, ++ ptdev->gpu_info.mem_features, ++ ptdev->gpu_info.mmu_features, ++ ptdev->gpu_info.as_present); ++ ++ drm_info(&ptdev->base, ++ "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", ++ ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, ++ ptdev->gpu_info.tiler_present); ++} ++ ++static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ if (status & GPU_IRQ_FAULT) { ++ u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); ++ u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | ++ gpu_read(ptdev, GPU_FAULT_ADDR_LO); ++ ++ drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n", ++ fault_status, panthor_exception_name(ptdev, fault_status & 0xFF), ++ address); ++ } ++ if (status & GPU_IRQ_PROTM_FAULT) ++ drm_warn(&ptdev->base, "GPU Fault in protected mode\n"); ++ ++ spin_lock(&ptdev->gpu->reqs_lock); ++ if (status & ptdev->gpu->pending_reqs) { ++ ptdev->gpu->pending_reqs &= ~status; ++ wake_up_all(&ptdev->gpu->reqs_acked); ++ } ++ spin_unlock(&ptdev->gpu->reqs_lock); ++} ++PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler); ++ ++/** ++ * panthor_gpu_unplug() - Called when the GPU is unplugged. ++ * @ptdev: Device to unplug. ++ */ ++void panthor_gpu_unplug(struct panthor_device *ptdev) ++{ ++ unsigned long flags; ++ ++ /* Make sure the IRQ handler is not running after that point. */ ++ panthor_gpu_irq_suspend(&ptdev->gpu->irq); ++ ++ /* Wake-up all waiters. */ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ ptdev->gpu->pending_reqs = 0; ++ wake_up_all(&ptdev->gpu->reqs_acked); ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++} ++ ++/** ++ * panthor_gpu_init() - Initialize the GPU block ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_init(struct panthor_device *ptdev) ++{ ++ struct panthor_gpu *gpu; ++ u32 pa_bits; ++ int ret, irq; ++ ++ gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL); ++ if (!gpu) ++ return -ENOMEM; ++ ++ spin_lock_init(&gpu->reqs_lock); ++ init_waitqueue_head(&gpu->reqs_acked); ++ ptdev->gpu = gpu; ++ panthor_gpu_init_info(ptdev); ++ ++ dma_set_max_seg_size(ptdev->base.dev, UINT_MAX); ++ pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); ++ ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits)); ++ if (ret) ++ return ret; ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); ++ if (irq <= 0) ++ return ret; ++ ++ ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK); ++ if (ret) ++ return ret; ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU ++ * @ptdev: Device. ++ * @blk_name: Block name. ++ * @pwroff_reg: Power-off register for this block. ++ * @pwrtrans_reg: Power transition register for this block. ++ * @mask: Sub-elements to power-off. ++ * @timeout_us: Timeout in microseconds. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_block_power_off(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwroff_reg, u32 pwrtrans_reg, ++ u64 mask, u32 timeout_us) ++{ ++ u32 val, i; ++ int ret; ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ if (mask & GENMASK(31, 0)) ++ gpu_write(ptdev, pwroff_reg, mask); ++ ++ if (mask >> 32) ++ gpu_write(ptdev, pwroff_reg + 4, mask >> 32); ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU ++ * @ptdev: Device. ++ * @blk_name: Block name. ++ * @pwron_reg: Power-on register for this block. ++ * @pwrtrans_reg: Power transition register for this block. ++ * @rdy_reg: Power transition ready register. ++ * @mask: Sub-elements to power-on. ++ * @timeout_us: Timeout in microseconds. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_block_power_on(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwron_reg, u32 pwrtrans_reg, ++ u32 rdy_reg, u64 mask, u32 timeout_us) ++{ ++ u32 val, i; ++ int ret; ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), ++ val, !(mask32 & val), ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ if (mask & GENMASK(31, 0)) ++ gpu_write(ptdev, pwron_reg, mask); ++ ++ if (mask >> 32) ++ gpu_write(ptdev, pwron_reg + 4, mask >> 32); ++ ++ for (i = 0; i < 2; i++) { ++ u32 mask32 = mask >> (i * 32); ++ ++ if (!mask32) ++ continue; ++ ++ ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4), ++ val, (mask32 & val) == mask32, ++ 100, timeout_us); ++ if (ret) { ++ drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness", ++ blk_name, mask); ++ return ret; ++ } ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_l2_power_on() - Power-on the L2-cache ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_l2_power_on(struct panthor_device *ptdev) ++{ ++ if (ptdev->gpu_info.l2_present != 1) { ++ /* ++ * Only support one core group now. ++ * ~(l2_present - 1) unsets all bits in l2_present except ++ * the bottom bit. (l2_present - 2) has all the bits in ++ * the first core group set. AND them together to generate ++ * a mask of cores in the first core group. ++ */ ++ u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) & ++ (ptdev->gpu_info.l2_present - 2); ++ drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n", ++ hweight64(core_mask), ++ hweight64(ptdev->gpu_info.shader_present)); ++ } ++ ++ return panthor_gpu_power_on(ptdev, L2, 1, 20000); ++} ++ ++/** ++ * panthor_gpu_flush_caches() - Flush caches ++ * @ptdev: Device. ++ * @l2: L2 flush type. ++ * @lsc: LSC flush type. ++ * @other: Other flush type. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_flush_caches(struct panthor_device *ptdev, ++ u32 l2, u32 lsc, u32 other) ++{ ++ bool timedout = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if (!drm_WARN_ON(&ptdev->base, ++ ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { ++ ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; ++ gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); ++ } ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ ++ if (!wait_event_timeout(ptdev->gpu->reqs_acked, ++ !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), ++ msecs_to_jiffies(100))) { ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && ++ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) ++ timedout = true; ++ else ++ ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ } ++ ++ if (timedout) { ++ drm_err(&ptdev->base, "Flush caches timeout"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_soft_reset() - Issue a soft-reset ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_gpu_soft_reset(struct panthor_device *ptdev) ++{ ++ bool timedout = false; ++ unsigned long flags; ++ ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if (!drm_WARN_ON(&ptdev->base, ++ ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) { ++ ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED; ++ gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); ++ gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET); ++ } ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ ++ if (!wait_event_timeout(ptdev->gpu->reqs_acked, ++ !(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED), ++ msecs_to_jiffies(100))) { ++ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); ++ if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 && ++ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED)) ++ timedout = true; ++ else ++ ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED; ++ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); ++ } ++ ++ if (timedout) { ++ drm_err(&ptdev->base, "Soft reset timeout"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_gpu_suspend() - Suspend the GPU block. ++ * @ptdev: Device. ++ * ++ * Suspend the GPU irq. This should be called last in the suspend procedure, ++ * after all other blocks have been suspented. ++ */ ++void panthor_gpu_suspend(struct panthor_device *ptdev) ++{ ++ /* ++ * It may be preferable to simply power down the L2, but for now just ++ * soft-reset which will leave the L2 powered down. ++ */ ++ panthor_gpu_soft_reset(ptdev); ++ panthor_gpu_irq_suspend(&ptdev->gpu->irq); ++} ++ ++/** ++ * panthor_gpu_resume() - Resume the GPU block. ++ * @ptdev: Device. ++ * ++ * Resume the IRQ handler and power-on the L2-cache. ++ * The FW takes care of powering the other blocks. ++ */ ++void panthor_gpu_resume(struct panthor_device *ptdev) ++{ ++ panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); ++ panthor_gpu_l2_power_on(ptdev); ++} +diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h +new file mode 100644 +index 000000000000..bba7555dd3c6 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gpu.h +@@ -0,0 +1,52 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#ifndef __PANTHOR_GPU_H__ ++#define __PANTHOR_GPU_H__ ++ ++struct panthor_device; ++ ++int panthor_gpu_init(struct panthor_device *ptdev); ++void panthor_gpu_unplug(struct panthor_device *ptdev); ++void panthor_gpu_suspend(struct panthor_device *ptdev); ++void panthor_gpu_resume(struct panthor_device *ptdev); ++ ++int panthor_gpu_block_power_on(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwron_reg, u32 pwrtrans_reg, ++ u32 rdy_reg, u64 mask, u32 timeout_us); ++int panthor_gpu_block_power_off(struct panthor_device *ptdev, ++ const char *blk_name, ++ u32 pwroff_reg, u32 pwrtrans_reg, ++ u64 mask, u32 timeout_us); ++ ++/** ++ * panthor_gpu_power_on() - Power on the GPU block. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \ ++ panthor_gpu_block_power_on(ptdev, #type, \ ++ type ## _PWRON_LO, \ ++ type ## _PWRTRANS_LO, \ ++ type ## _READY_LO, \ ++ mask, timeout_us) ++ ++/** ++ * panthor_gpu_power_off() - Power off the GPU block. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \ ++ panthor_gpu_block_power_off(ptdev, #type, \ ++ type ## _PWROFF_LO, \ ++ type ## _PWRTRANS_LO, \ ++ mask, timeout_us) ++ ++int panthor_gpu_l2_power_on(struct panthor_device *ptdev); ++int panthor_gpu_flush_caches(struct panthor_device *ptdev, ++ u32 l2, u32 lsc, u32 other); ++int panthor_gpu_soft_reset(struct panthor_device *ptdev); ++ ++#endif +-- +2.42.0 + + +From 6ce04353f5bdf797ef42ae67f7c9c510075aaa12 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:19 +0100 +Subject: [PATCH 07/71] [MERGED] drm/panthor: Add GEM logical block + +Anything relating to GEM object management is placed here. Nothing +particularly interesting here, given the implementation is based on +drm_gem_shmem_object, which is doing most of the work. + +v6: +- Add Maxime's and Heiko's acks +- Return a page-aligned BO size to userspace when creating a BO +- Keep header inclusion alphabetically ordered + +v5: +- Add Liviu's and Steve's R-b + +v4: +- Force kernel BOs to be GPU mapped +- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers + to simplify the call sites + +v3: +- Add acks for the MIT/GPL2 relicensing +- Provide a panthor_kernel_bo abstraction for buffer objects managed by + the kernel (will replace panthor_fw_mem and be used everywhere we were + using panthor_gem_create_and_map() before) +- Adjust things to match drm_gpuvm changes +- Change return of panthor_gem_create_with_handle() to int + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Liviu Dudau +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-6-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_gem.c | 230 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_gem.h | 142 ++++++++++++++++ + 2 files changed, 372 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_gem.c + create mode 100644 drivers/gpu/drm/panthor/panthor_gem.h + +diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c +new file mode 100644 +index 000000000000..d6483266d0c2 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gem.c +@@ -0,0 +1,230 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_mmu.h" ++ ++static void panthor_gem_free_object(struct drm_gem_object *obj) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(obj); ++ struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem; ++ ++ drm_gem_free_mmap_offset(&bo->base.base); ++ mutex_destroy(&bo->gpuva_list_lock); ++ drm_gem_shmem_free(&bo->base); ++ drm_gem_object_put(vm_root_gem); ++} ++ ++/** ++ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object ++ * @vm: The VM this BO was mapped to. ++ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction ++ * is skipped. ++ */ ++void panthor_kernel_bo_destroy(struct panthor_vm *vm, ++ struct panthor_kernel_bo *bo) ++{ ++ int ret; ++ ++ if (IS_ERR_OR_NULL(bo)) ++ return; ++ ++ panthor_kernel_bo_vunmap(bo); ++ ++ if (drm_WARN_ON(bo->obj->dev, ++ to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm))) ++ goto out_free_bo; ++ ++ ret = panthor_vm_unmap_range(vm, bo->va_node.start, ++ panthor_kernel_bo_size(bo)); ++ if (ret) ++ goto out_free_bo; ++ ++ panthor_vm_free_va(vm, &bo->va_node); ++ drm_gem_object_put(bo->obj); ++ ++out_free_bo: ++ kfree(bo); ++} ++ ++/** ++ * panthor_kernel_bo_create() - Create and map a GEM object to a VM ++ * @ptdev: Device. ++ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped. ++ * @size: Size of the buffer object. ++ * @bo_flags: Combination of drm_panthor_bo_flags flags. ++ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those ++ * that are related to map operations). ++ * @gpu_va: GPU address assigned when mapping to the VM. ++ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be ++ * automatically allocated. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, ++ size_t size, u32 bo_flags, u32 vm_map_flags, ++ u64 gpu_va) ++{ ++ struct drm_gem_shmem_object *obj; ++ struct panthor_kernel_bo *kbo; ++ struct panthor_gem_object *bo; ++ int ret; ++ ++ if (drm_WARN_ON(&ptdev->base, !vm)) ++ return ERR_PTR(-EINVAL); ++ ++ kbo = kzalloc(sizeof(*kbo), GFP_KERNEL); ++ if (!kbo) ++ return ERR_PTR(-ENOMEM); ++ ++ obj = drm_gem_shmem_create(&ptdev->base, size); ++ if (IS_ERR(obj)) { ++ ret = PTR_ERR(obj); ++ goto err_free_bo; ++ } ++ ++ bo = to_panthor_bo(&obj->base); ++ size = obj->base.size; ++ kbo->obj = &obj->base; ++ bo->flags = bo_flags; ++ ++ ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node); ++ if (ret) ++ goto err_put_obj; ++ ++ ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags); ++ if (ret) ++ goto err_free_va; ++ ++ bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); ++ drm_gem_object_get(bo->exclusive_vm_root_gem); ++ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; ++ return kbo; ++ ++err_free_va: ++ panthor_vm_free_va(vm, &kbo->va_node); ++ ++err_put_obj: ++ drm_gem_object_put(&obj->base); ++ ++err_free_bo: ++ kfree(kbo); ++ return ERR_PTR(ret); ++} ++ ++static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(obj); ++ ++ /* Don't allow mmap on objects that have the NO_MMAP flag set. */ ++ if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) ++ return -EINVAL; ++ ++ return drm_gem_shmem_object_mmap(obj, vma); ++} ++ ++static struct dma_buf * ++panthor_gem_prime_export(struct drm_gem_object *obj, int flags) ++{ ++ /* We can't export GEMs that have an exclusive VM. */ ++ if (to_panthor_bo(obj)->exclusive_vm_root_gem) ++ return ERR_PTR(-EINVAL); ++ ++ return drm_gem_prime_export(obj, flags); ++} ++ ++static const struct drm_gem_object_funcs panthor_gem_funcs = { ++ .free = panthor_gem_free_object, ++ .print_info = drm_gem_shmem_object_print_info, ++ .pin = drm_gem_shmem_object_pin, ++ .unpin = drm_gem_shmem_object_unpin, ++ .get_sg_table = drm_gem_shmem_object_get_sg_table, ++ .vmap = drm_gem_shmem_object_vmap, ++ .vunmap = drm_gem_shmem_object_vunmap, ++ .mmap = panthor_gem_mmap, ++ .export = panthor_gem_prime_export, ++ .vm_ops = &drm_gem_shmem_vm_ops, ++}; ++ ++/** ++ * panthor_gem_create_object - Implementation of driver->gem_create_object. ++ * @ddev: DRM device ++ * @size: Size in bytes of the memory the object will reference ++ * ++ * This lets the GEM helpers allocate object structs for us, and keep ++ * our BO stats correct. ++ */ ++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_gem_object *obj; ++ ++ obj = kzalloc(sizeof(*obj), GFP_KERNEL); ++ if (!obj) ++ return ERR_PTR(-ENOMEM); ++ ++ obj->base.base.funcs = &panthor_gem_funcs; ++ obj->base.map_wc = !ptdev->coherent; ++ mutex_init(&obj->gpuva_list_lock); ++ drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); ++ ++ return &obj->base.base; ++} ++ ++/** ++ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle. ++ * @file: DRM file. ++ * @ddev: DRM device. ++ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared. ++ * @size: Size of the GEM object to allocate. ++ * @flags: Combination of drm_panthor_bo_flags flags. ++ * @handle: Pointer holding the handle pointing to the new GEM object. ++ * ++ * Return: Zero on success ++ */ ++int ++panthor_gem_create_with_handle(struct drm_file *file, ++ struct drm_device *ddev, ++ struct panthor_vm *exclusive_vm, ++ u64 *size, u32 flags, u32 *handle) ++{ ++ int ret; ++ struct drm_gem_shmem_object *shmem; ++ struct panthor_gem_object *bo; ++ ++ shmem = drm_gem_shmem_create(ddev, *size); ++ if (IS_ERR(shmem)) ++ return PTR_ERR(shmem); ++ ++ bo = to_panthor_bo(&shmem->base); ++ bo->flags = flags; ++ ++ if (exclusive_vm) { ++ bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); ++ drm_gem_object_get(bo->exclusive_vm_root_gem); ++ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; ++ } ++ ++ /* ++ * Allocate an id of idr table where the obj is registered ++ * and handle has the id what user can see. ++ */ ++ ret = drm_gem_handle_create(file, &shmem->base, handle); ++ if (!ret) ++ *size = bo->base.base.size; ++ ++ /* drop reference from allocate - handle holds it now. */ ++ drm_gem_object_put(&shmem->base); ++ ++ return ret; ++} +diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h +new file mode 100644 +index 000000000000..3bccba394d00 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_gem.h +@@ -0,0 +1,142 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_GEM_H__ ++#define __PANTHOR_GEM_H__ ++ ++#include ++#include ++ ++#include ++#include ++ ++struct panthor_vm; ++ ++/** ++ * struct panthor_gem_object - Driver specific GEM object. ++ */ ++struct panthor_gem_object { ++ /** @base: Inherit from drm_gem_shmem_object. */ ++ struct drm_gem_shmem_object base; ++ ++ /** ++ * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object ++ * is attached to. ++ * ++ * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a ++ * different VM will fail. ++ * ++ * All FW memory objects have this field set to the root GEM of the MCU ++ * VM. ++ */ ++ struct drm_gem_object *exclusive_vm_root_gem; ++ ++ /** ++ * @gpuva_list_lock: Custom GPUVA lock. ++ * ++ * Used to protect insertion of drm_gpuva elements to the ++ * drm_gem_object.gpuva.list list. ++ * ++ * We can't use the GEM resv for that, because drm_gpuva_link() is ++ * called in a dma-signaling path, where we're not allowed to take ++ * resv locks. ++ */ ++ struct mutex gpuva_list_lock; ++ ++ /** @flags: Combination of drm_panthor_bo_flags flags. */ ++ u32 flags; ++}; ++ ++/** ++ * struct panthor_kernel_bo - Kernel buffer object. ++ * ++ * These objects are only manipulated by the kernel driver and not ++ * directly exposed to the userspace. The GPU address of a kernel ++ * BO might be passed to userspace though. ++ */ ++struct panthor_kernel_bo { ++ /** ++ * @obj: The GEM object backing this kernel buffer object. ++ */ ++ struct drm_gem_object *obj; ++ ++ /** ++ * @va_node: VA space allocated to this GEM. ++ */ ++ struct drm_mm_node va_node; ++ ++ /** ++ * @kmap: Kernel CPU mapping of @gem. ++ */ ++ void *kmap; ++}; ++ ++static inline ++struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj) ++{ ++ return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base); ++} ++ ++struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size); ++ ++struct drm_gem_object * ++panthor_gem_prime_import_sg_table(struct drm_device *ddev, ++ struct dma_buf_attachment *attach, ++ struct sg_table *sgt); ++ ++int ++panthor_gem_create_with_handle(struct drm_file *file, ++ struct drm_device *ddev, ++ struct panthor_vm *exclusive_vm, ++ u64 *size, u32 flags, uint32_t *handle); ++ ++static inline u64 ++panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) ++{ ++ return bo->va_node.start; ++} ++ ++static inline size_t ++panthor_kernel_bo_size(struct panthor_kernel_bo *bo) ++{ ++ return bo->obj->size; ++} ++ ++static inline int ++panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo) ++{ ++ struct iosys_map map; ++ int ret; ++ ++ if (bo->kmap) ++ return 0; ++ ++ ret = drm_gem_vmap_unlocked(bo->obj, &map); ++ if (ret) ++ return ret; ++ ++ bo->kmap = map.vaddr; ++ return 0; ++} ++ ++static inline void ++panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) ++{ ++ if (bo->kmap) { ++ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap); ++ ++ drm_gem_vunmap_unlocked(bo->obj, &map); ++ bo->kmap = NULL; ++ } ++} ++ ++struct panthor_kernel_bo * ++panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, ++ size_t size, u32 bo_flags, u32 vm_map_flags, ++ u64 gpu_va); ++ ++void panthor_kernel_bo_destroy(struct panthor_vm *vm, ++ struct panthor_kernel_bo *bo); ++ ++#endif /* __PANTHOR_GEM_H__ */ +-- +2.42.0 + + +From 07aa7d9748282f568cd79b6d6e29c9f3a6997a6f Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:20 +0100 +Subject: [PATCH 08/71] [MERGED] drm/panthor: Add the devfreq logical block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Every thing related to devfreq in placed in panthor_devfreq.c, and +helpers that can be called by other logical blocks are exposed through +panthor_devfreq.h. + +This implementation is loosely based on the panfrost implementation, +the only difference being that we don't count device users, because +the idle/active state will be managed by the scheduler logic. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v4: +- Add Clément's A-b for the relicensing + +v3: +- Add acks for the MIT/GPL2 relicensing + +v2: +- Added in v2 + +Cc: Clément Péron # MIT+GPL2 relicensing +Reviewed-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Acked-by: Clément Péron # MIT+GPL2 relicensing +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-7-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_devfreq.h | 21 ++ + 2 files changed, 304 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.c + create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.h + +diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c +new file mode 100644 +index 000000000000..7ac4fa290f27 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_devfreq.c +@@ -0,0 +1,283 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++ ++/** ++ * struct panthor_devfreq - Device frequency management ++ */ ++struct panthor_devfreq { ++ /** @devfreq: devfreq device. */ ++ struct devfreq *devfreq; ++ ++ /** @gov_data: Governor data. */ ++ struct devfreq_simple_ondemand_data gov_data; ++ ++ /** @busy_time: Busy time. */ ++ ktime_t busy_time; ++ ++ /** @idle_time: Idle time. */ ++ ktime_t idle_time; ++ ++ /** @time_last_update: Last update time. */ ++ ktime_t time_last_update; ++ ++ /** @last_busy_state: True if the GPU was busy last time we updated the state. */ ++ bool last_busy_state; ++ ++ /* ++ * @lock: Lock used to protect busy_time, idle_time, time_last_update and ++ * last_busy_state. ++ * ++ * These fields can be accessed concurrently by panthor_devfreq_get_dev_status() ++ * and panthor_devfreq_record_{busy,idle}(). ++ */ ++ spinlock_t lock; ++}; ++ ++static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq) ++{ ++ ktime_t now, last; ++ ++ now = ktime_get(); ++ last = pdevfreq->time_last_update; ++ ++ if (pdevfreq->last_busy_state) ++ pdevfreq->busy_time += ktime_sub(now, last); ++ else ++ pdevfreq->idle_time += ktime_sub(now, last); ++ ++ pdevfreq->time_last_update = now; ++} ++ ++static int panthor_devfreq_target(struct device *dev, unsigned long *freq, ++ u32 flags) ++{ ++ struct dev_pm_opp *opp; ++ ++ opp = devfreq_recommended_opp(dev, freq, flags); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ dev_pm_opp_put(opp); ++ ++ return dev_pm_opp_set_rate(dev, *freq); ++} ++ ++static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq) ++{ ++ pdevfreq->busy_time = 0; ++ pdevfreq->idle_time = 0; ++ pdevfreq->time_last_update = ktime_get(); ++} ++ ++static int panthor_devfreq_get_dev_status(struct device *dev, ++ struct devfreq_dev_status *status) ++{ ++ struct panthor_device *ptdev = dev_get_drvdata(dev); ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ status->current_frequency = clk_get_rate(ptdev->clks.core); ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ ++ status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time, ++ pdevfreq->idle_time)); ++ ++ status->busy_time = ktime_to_ns(pdevfreq->busy_time); ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++ ++ drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n", ++ status->busy_time, status->total_time, ++ status->busy_time / (status->total_time / 100), ++ status->current_frequency / 1000 / 1000); ++ ++ return 0; ++} ++ ++static struct devfreq_dev_profile panthor_devfreq_profile = { ++ .timer = DEVFREQ_TIMER_DELAYED, ++ .polling_ms = 50, /* ~3 frames */ ++ .target = panthor_devfreq_target, ++ .get_dev_status = panthor_devfreq_get_dev_status, ++}; ++ ++int panthor_devfreq_init(struct panthor_device *ptdev) ++{ ++ /* There's actually 2 regulators (mali and sram), but the OPP core only ++ * supports one. ++ * ++ * We assume the sram regulator is coupled with the mali one and let ++ * the coupling logic deal with voltage updates. ++ */ ++ static const char * const reg_names[] = { "mali", NULL }; ++ struct thermal_cooling_device *cooling; ++ struct device *dev = ptdev->base.dev; ++ struct panthor_devfreq *pdevfreq; ++ struct dev_pm_opp *opp; ++ unsigned long cur_freq; ++ int ret; ++ ++ pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL); ++ if (!pdevfreq) ++ return -ENOMEM; ++ ++ ptdev->devfreq = pdevfreq; ++ ++ ret = devm_pm_opp_set_regulators(dev, reg_names); ++ if (ret) { ++ if (ret != -EPROBE_DEFER) ++ DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); ++ ++ return ret; ++ } ++ ++ ret = devm_pm_opp_of_add_table(dev); ++ if (ret) ++ return ret; ++ ++ spin_lock_init(&pdevfreq->lock); ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ cur_freq = clk_get_rate(ptdev->clks.core); ++ ++ opp = devfreq_recommended_opp(dev, &cur_freq, 0); ++ if (IS_ERR(opp)) ++ return PTR_ERR(opp); ++ ++ panthor_devfreq_profile.initial_freq = cur_freq; ++ ++ /* Regulator coupling only takes care of synchronizing/balancing voltage ++ * updates, but the coupled regulator needs to be enabled manually. ++ * ++ * We use devm_regulator_get_enable_optional() and keep the sram supply ++ * enabled until the device is removed, just like we do for the mali ++ * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called, ++ * and disabled when the opp_table is torn down, using the devm action. ++ * ++ * If we really care about disabling regulators on suspend, we should: ++ * - use devm_regulator_get_optional() here ++ * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function ++ * (this disables the regulator passed to the OPP layer) ++ * - call dev_pm_opp_set_opp(dev, NULL) and ++ * regulator_disable(ptdev->regulators.sram) in ++ * panthor_devfreq_suspend() ++ * - call dev_pm_opp_set_opp(dev, default_opp) and ++ * regulator_enable(ptdev->regulators.sram) in ++ * panthor_devfreq_resume() ++ * ++ * But without knowing if it's beneficial or not (in term of power ++ * consumption), or how much it slows down the suspend/resume steps, ++ * let's just keep regulators enabled for the device lifetime. ++ */ ++ ret = devm_regulator_get_enable_optional(dev, "sram"); ++ if (ret && ret != -ENODEV) { ++ if (ret != -EPROBE_DEFER) ++ DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n"); ++ return ret; ++ } ++ ++ /* ++ * Set the recommend OPP this will enable and configure the regulator ++ * if any and will avoid a switch off by regulator_late_cleanup() ++ */ ++ ret = dev_pm_opp_set_opp(dev, opp); ++ if (ret) { ++ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n"); ++ return ret; ++ } ++ ++ dev_pm_opp_put(opp); ++ ++ /* ++ * Setup default thresholds for the simple_ondemand governor. ++ * The values are chosen based on experiments. ++ */ ++ pdevfreq->gov_data.upthreshold = 45; ++ pdevfreq->gov_data.downdifferential = 5; ++ ++ pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile, ++ DEVFREQ_GOV_SIMPLE_ONDEMAND, ++ &pdevfreq->gov_data); ++ if (IS_ERR(pdevfreq->devfreq)) { ++ DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n"); ++ ret = PTR_ERR(pdevfreq->devfreq); ++ pdevfreq->devfreq = NULL; ++ return ret; ++ } ++ ++ cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL); ++ if (IS_ERR(cooling)) ++ DRM_DEV_INFO(dev, "Failed to register cooling device\n"); ++ ++ return 0; ++} ++ ++int panthor_devfreq_resume(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ ++ if (!pdevfreq->devfreq) ++ return 0; ++ ++ panthor_devfreq_reset(pdevfreq); ++ ++ return devfreq_resume_device(pdevfreq->devfreq); ++} ++ ++int panthor_devfreq_suspend(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ ++ if (!pdevfreq->devfreq) ++ return 0; ++ ++ return devfreq_suspend_device(pdevfreq->devfreq); ++} ++ ++void panthor_devfreq_record_busy(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ if (!pdevfreq->devfreq) ++ return; ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ pdevfreq->last_busy_state = true; ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++} ++ ++void panthor_devfreq_record_idle(struct panthor_device *ptdev) ++{ ++ struct panthor_devfreq *pdevfreq = ptdev->devfreq; ++ unsigned long irqflags; ++ ++ if (!pdevfreq->devfreq) ++ return; ++ ++ spin_lock_irqsave(&pdevfreq->lock, irqflags); ++ ++ panthor_devfreq_update_utilization(pdevfreq); ++ pdevfreq->last_busy_state = false; ++ ++ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); ++} +diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h +new file mode 100644 +index 000000000000..83a5c9522493 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_devfreq.h +@@ -0,0 +1,21 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#ifndef __PANTHOR_DEVFREQ_H__ ++#define __PANTHOR_DEVFREQ_H__ ++ ++struct devfreq; ++struct thermal_cooling_device; ++ ++struct panthor_device; ++struct panthor_devfreq; ++ ++int panthor_devfreq_init(struct panthor_device *ptdev); ++ ++int panthor_devfreq_resume(struct panthor_device *ptdev); ++int panthor_devfreq_suspend(struct panthor_device *ptdev); ++ ++void panthor_devfreq_record_busy(struct panthor_device *ptdev); ++void panthor_devfreq_record_idle(struct panthor_device *ptdev); ++ ++#endif /* __PANTHOR_DEVFREQ_H__ */ +-- +2.42.0 + + +From 02a6ff1764dbf5370cd91a484fa01725a3badf99 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:21 +0100 +Subject: [PATCH 09/71] [MERGED] drm/panthor: Add the MMU/VM logical block + +MMU and VM management is related and placed in the same source file. + +Page table updates are delegated to the io-pgtable-arm driver that's in +the iommu subsystem. + +The VM management logic is based on drm_gpuva_mgr, and is assuming the +VA space is mostly managed by the usermode driver, except for a reserved +portion of this VA-space that's used for kernel objects (like the heap +contexts/chunks). + +Both asynchronous and synchronous VM operations are supported, and +internal helpers are exposed to allow other logical blocks to map their +buffers in the GPU VA space. + +There's one VM_BIND queue per-VM (meaning the Vulkan driver can only +expose one sparse-binding queue), and this bind queue is managed with +a 1:1 drm_sched_entity:drm_gpu_scheduler, such that each VM gets its own +independent execution queue, avoiding VM operation serialization at the +device level (things are still serialized at the VM level). + +The rest is just implementation details that are hopefully well explained +in the documentation. + +v6: +- Add Maxime's and Heiko's acks +- Add Steve's R-b +- Adjust the TRANSCFG value to account for SW VA space limitation on + 32-bit systems +- Keep header inclusion alphabetically ordered + +v5: +- Fix a double panthor_vm_cleanup_op_ctx() call +- Fix a race between panthor_vm_prepare_map_op_ctx() and + panthor_vm_bo_put() +- Fix panthor_vm_pool_destroy_vm() kernel doc +- Fix paddr adjustment in panthor_vm_map_pages() +- Fix bo_offset calculation in panthor_vm_get_bo_for_va() + +v4: +- Add an helper to return the VM state +- Check drmm_mutex_init() return code +- Remove the VM from the AS reclaim list when panthor_vm_active() is + called +- Count the number of active VM users instead of considering there's + at most one user (several scheduling groups can point to the same + vM) +- Pre-allocate a VMA object for unmap operations (unmaps can trigger + a sm_step_remap() call) +- Check vm->root_page_table instead of vm->pgtbl_ops to detect if + the io-pgtable is trying to allocate the root page table +- Don't memset() the va_node in panthor_vm_alloc_va(), make it a + caller requirement +- Fix the kernel doc in a few places +- Drop the panthor_vm::base offset constraint and modify + panthor_vm_put() to explicitly check for a NULL value +- Fix unbalanced vm_bo refcount in panthor_gpuva_sm_step_remap() +- Drop stale comments about the shared_bos list +- Patch mmu_features::va_bits on 32-bit builds to reflect the + io_pgtable limitation and let the UMD know about it + +v3: +- Add acks for the MIT/GPL2 relicensing +- Propagate MMU faults to the scheduler +- Move pages pinning/unpinning out of the dma_signalling path +- Fix 32-bit support +- Rework the user/kernel VA range calculation +- Make the auto-VA range explicit (auto-VA range doesn't cover the full + kernel-VA range on the MCU VM) +- Let callers of panthor_vm_alloc_va() allocate the drm_mm_node + (embedded in panthor_kernel_bo now) +- Adjust things to match the latest drm_gpuvm changes (extobj tracking, + resv prep and more) +- Drop the per-AS lock and use slots_lock (fixes a race on vm->as.id) +- Set as.id to -1 when reusing an address space from the LRU list +- Drop misleading comment about page faults +- Remove check for irq being assigned in panthor_mmu_unplug() + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-8-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_mmu.c | 2768 +++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_mmu.h | 102 + + 2 files changed, 2870 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.c + create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.h + +diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c +new file mode 100644 +index 000000000000..fdd35249169f +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_mmu.c +@@ -0,0 +1,2768 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++#define MAX_AS_SLOTS 32 ++ ++struct panthor_vm; ++ ++/** ++ * struct panthor_as_slot - Address space slot ++ */ ++struct panthor_as_slot { ++ /** @vm: VM bound to this slot. NULL is no VM is bound. */ ++ struct panthor_vm *vm; ++}; ++ ++/** ++ * struct panthor_mmu - MMU related data ++ */ ++struct panthor_mmu { ++ /** @irq: The MMU irq. */ ++ struct panthor_irq irq; ++ ++ /** @as: Address space related fields. ++ * ++ * The GPU has a limited number of address spaces (AS) slots, forcing ++ * us to re-assign them to re-assign slots on-demand. ++ */ ++ struct { ++ /** @slots_lock: Lock protecting access to all other AS fields. */ ++ struct mutex slots_lock; ++ ++ /** @alloc_mask: Bitmask encoding the allocated slots. */ ++ unsigned long alloc_mask; ++ ++ /** @faulty_mask: Bitmask encoding the faulty slots. */ ++ unsigned long faulty_mask; ++ ++ /** @slots: VMs currently bound to the AS slots. */ ++ struct panthor_as_slot slots[MAX_AS_SLOTS]; ++ ++ /** ++ * @lru_list: List of least recently used VMs. ++ * ++ * We use this list to pick a VM to evict when all slots are ++ * used. ++ * ++ * There should be no more active VMs than there are AS slots, ++ * so this LRU is just here to keep VMs bound until there's ++ * a need to release a slot, thus avoid unnecessary TLB/cache ++ * flushes. ++ */ ++ struct list_head lru_list; ++ } as; ++ ++ /** @vm: VMs management fields */ ++ struct { ++ /** @lock: Lock protecting access to list. */ ++ struct mutex lock; ++ ++ /** @list: List containing all VMs. */ ++ struct list_head list; ++ ++ /** @reset_in_progress: True if a reset is in progress. */ ++ bool reset_in_progress; ++ ++ /** @wq: Workqueue used for the VM_BIND queues. */ ++ struct workqueue_struct *wq; ++ } vm; ++}; ++ ++/** ++ * struct panthor_vm_pool - VM pool object ++ */ ++struct panthor_vm_pool { ++ /** @xa: Array used for VM handle tracking. */ ++ struct xarray xa; ++}; ++ ++/** ++ * struct panthor_vma - GPU mapping object ++ * ++ * This is used to track GEM mappings in GPU space. ++ */ ++struct panthor_vma { ++ /** @base: Inherits from drm_gpuva. */ ++ struct drm_gpuva base; ++ ++ /** @node: Used to implement deferred release of VMAs. */ ++ struct list_head node; ++ ++ /** ++ * @flags: Combination of drm_panthor_vm_bind_op_flags. ++ * ++ * Only map related flags are accepted. ++ */ ++ u32 flags; ++}; ++ ++/** ++ * struct panthor_vm_op_ctx - VM operation context ++ * ++ * With VM operations potentially taking place in a dma-signaling path, we ++ * need to make sure everything that might require resource allocation is ++ * pre-allocated upfront. This is what this operation context is far. ++ * ++ * We also collect resources that have been freed, so we can release them ++ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND ++ * request. ++ */ ++struct panthor_vm_op_ctx { ++ /** @rsvd_page_tables: Pages reserved for the MMU page table update. */ ++ struct { ++ /** @count: Number of pages reserved. */ ++ u32 count; ++ ++ /** @ptr: Point to the first unused page in the @pages table. */ ++ u32 ptr; ++ ++ /** ++ * @page: Array of pages that can be used for an MMU page table update. ++ * ++ * After an VM operation, there might be free pages left in this array. ++ * They should be returned to the pt_cache as part of the op_ctx cleanup. ++ */ ++ void **pages; ++ } rsvd_page_tables; ++ ++ /** ++ * @preallocated_vmas: Pre-allocated VMAs to handle the remap case. ++ * ++ * Partial unmap requests or map requests overlapping existing mappings will ++ * trigger a remap call, which need to register up to three panthor_vma objects ++ * (one for the new mapping, and two for the previous and next mappings). ++ */ ++ struct panthor_vma *preallocated_vmas[3]; ++ ++ /** @flags: Combination of drm_panthor_vm_bind_op_flags. */ ++ u32 flags; ++ ++ /** @va: Virtual range targeted by the VM operation. */ ++ struct { ++ /** @addr: Start address. */ ++ u64 addr; ++ ++ /** @range: Range size. */ ++ u64 range; ++ } va; ++ ++ /** ++ * @returned_vmas: List of panthor_vma objects returned after a VM operation. ++ * ++ * For unmap operations, this will contain all VMAs that were covered by the ++ * specified VA range. ++ * ++ * For map operations, this will contain all VMAs that previously mapped to ++ * the specified VA range. ++ * ++ * Those VMAs, and the resources they point to will be released as part of ++ * the op_ctx cleanup operation. ++ */ ++ struct list_head returned_vmas; ++ ++ /** @map: Fields specific to a map operation. */ ++ struct { ++ /** @vm_bo: Buffer object to map. */ ++ struct drm_gpuvm_bo *vm_bo; ++ ++ /** @bo_offset: Offset in the buffer object. */ ++ u64 bo_offset; ++ ++ /** ++ * @sgt: sg-table pointing to pages backing the GEM object. ++ * ++ * This is gathered at job creation time, such that we don't have ++ * to allocate in ::run_job(). ++ */ ++ struct sg_table *sgt; ++ ++ /** ++ * @new_vma: The new VMA object that will be inserted to the VA tree. ++ */ ++ struct panthor_vma *new_vma; ++ } map; ++}; ++ ++/** ++ * struct panthor_vm - VM object ++ * ++ * A VM is an object representing a GPU (or MCU) virtual address space. ++ * It embeds the MMU page table for this address space, a tree containing ++ * all the virtual mappings of GEM objects, and other things needed to manage ++ * the VM. ++ * ++ * Except for the MCU VM, which is managed by the kernel, all other VMs are ++ * created by userspace and mostly managed by userspace, using the ++ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl. ++ * ++ * A portion of the virtual address space is reserved for kernel objects, ++ * like heap chunks, and userspace gets to decide how much of the virtual ++ * address space is left to the kernel (half of the virtual address space ++ * by default). ++ */ ++struct panthor_vm { ++ /** ++ * @base: Inherit from drm_gpuvm. ++ * ++ * We delegate all the VA management to the common drm_gpuvm framework ++ * and only implement hooks to update the MMU page table. ++ */ ++ struct drm_gpuvm base; ++ ++ /** ++ * @sched: Scheduler used for asynchronous VM_BIND request. ++ * ++ * We use a 1:1 scheduler here. ++ */ ++ struct drm_gpu_scheduler sched; ++ ++ /** ++ * @entity: Scheduling entity representing the VM_BIND queue. ++ * ++ * There's currently one bind queue per VM. It doesn't make sense to ++ * allow more given the VM operations are serialized anyway. ++ */ ++ struct drm_sched_entity entity; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @memattr: Value to program to the AS_MEMATTR register. */ ++ u64 memattr; ++ ++ /** @pgtbl_ops: Page table operations. */ ++ struct io_pgtable_ops *pgtbl_ops; ++ ++ /** @root_page_table: Stores the root page table pointer. */ ++ void *root_page_table; ++ ++ /** ++ * @op_lock: Lock used to serialize operations on a VM. ++ * ++ * The serialization of jobs queued to the VM_BIND queue is already ++ * taken care of by drm_sched, but we need to serialize synchronous ++ * and asynchronous VM_BIND request. This is what this lock is for. ++ */ ++ struct mutex op_lock; ++ ++ /** ++ * @op_ctx: The context attached to the currently executing VM operation. ++ * ++ * NULL when no operation is in progress. ++ */ ++ struct panthor_vm_op_ctx *op_ctx; ++ ++ /** ++ * @mm: Memory management object representing the auto-VA/kernel-VA. ++ * ++ * Used to auto-allocate VA space for kernel-managed objects (tiler ++ * heaps, ...). ++ * ++ * For the MCU VM, this is managing the VA range that's used to map ++ * all shared interfaces. ++ * ++ * For user VMs, the range is specified by userspace, and must not ++ * exceed half of the VA space addressable. ++ */ ++ struct drm_mm mm; ++ ++ /** @mm_lock: Lock protecting the @mm field. */ ++ struct mutex mm_lock; ++ ++ /** @kernel_auto_va: Automatic VA-range for kernel BOs. */ ++ struct { ++ /** @start: Start of the automatic VA-range for kernel BOs. */ ++ u64 start; ++ ++ /** @size: Size of the automatic VA-range for kernel BOs. */ ++ u64 end; ++ } kernel_auto_va; ++ ++ /** @as: Address space related fields. */ ++ struct { ++ /** ++ * @id: ID of the address space this VM is bound to. ++ * ++ * A value of -1 means the VM is inactive/not bound. ++ */ ++ int id; ++ ++ /** @active_cnt: Number of active users of this VM. */ ++ refcount_t active_cnt; ++ ++ /** ++ * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. ++ * ++ * Active VMs should not be inserted in the LRU list. ++ */ ++ struct list_head lru_node; ++ } as; ++ ++ /** ++ * @heaps: Tiler heap related fields. ++ */ ++ struct { ++ /** ++ * @pool: The heap pool attached to this VM. ++ * ++ * Will stay NULL until someone creates a heap context on this VM. ++ */ ++ struct panthor_heap_pool *pool; ++ ++ /** @lock: Lock used to protect access to @pool. */ ++ struct mutex lock; ++ } heaps; ++ ++ /** @node: Used to insert the VM in the panthor_mmu::vm::list. */ ++ struct list_head node; ++ ++ /** @for_mcu: True if this is the MCU VM. */ ++ bool for_mcu; ++ ++ /** ++ * @destroyed: True if the VM was destroyed. ++ * ++ * No further bind requests should be queued to a destroyed VM. ++ */ ++ bool destroyed; ++ ++ /** ++ * @unusable: True if the VM has turned unusable because something ++ * bad happened during an asynchronous request. ++ * ++ * We don't try to recover from such failures, because this implies ++ * informing userspace about the specific operation that failed, and ++ * hoping the userspace driver can replay things from there. This all ++ * sounds very complicated for little gain. ++ * ++ * Instead, we should just flag the VM as unusable, and fail any ++ * further request targeting this VM. ++ * ++ * We also provide a way to query a VM state, so userspace can destroy ++ * it and create a new one. ++ * ++ * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST ++ * situation, where the logical device needs to be re-created. ++ */ ++ bool unusable; ++ ++ /** ++ * @unhandled_fault: Unhandled fault happened. ++ * ++ * This should be reported to the scheduler, and the queue/group be ++ * flagged as faulty as a result. ++ */ ++ bool unhandled_fault; ++}; ++ ++/** ++ * struct panthor_vm_bind_job - VM bind job ++ */ ++struct panthor_vm_bind_job { ++ /** @base: Inherit from drm_sched_job. */ ++ struct drm_sched_job base; ++ ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */ ++ struct work_struct cleanup_op_ctx_work; ++ ++ /** @vm: VM targeted by the VM operation. */ ++ struct panthor_vm *vm; ++ ++ /** @ctx: Operation context. */ ++ struct panthor_vm_op_ctx ctx; ++}; ++ ++/** ++ * @pt_cache: Cache used to allocate MMU page tables. ++ * ++ * The pre-allocation pattern forces us to over-allocate to plan for ++ * the worst case scenario, and return the pages we didn't use. ++ * ++ * Having a kmem_cache allows us to speed allocations. ++ */ ++static struct kmem_cache *pt_cache; ++ ++/** ++ * alloc_pt() - Custom page table allocator ++ * @cookie: Cookie passed at page table allocation time. ++ * @size: Size of the page table. This size should be fixed, ++ * and determined at creation time based on the granule size. ++ * @gfp: GFP flags. ++ * ++ * We want a custom allocator so we can use a cache for page table ++ * allocations and amortize the cost of the over-reservation that's ++ * done to allow asynchronous VM operations. ++ * ++ * Return: non-NULL on success, NULL if the allocation failed for any ++ * reason. ++ */ ++static void *alloc_pt(void *cookie, size_t size, gfp_t gfp) ++{ ++ struct panthor_vm *vm = cookie; ++ void *page; ++ ++ /* Allocation of the root page table happening during init. */ ++ if (unlikely(!vm->root_page_table)) { ++ struct page *p; ++ ++ drm_WARN_ON(&vm->ptdev->base, vm->op_ctx); ++ p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev), ++ gfp | __GFP_ZERO, get_order(size)); ++ page = p ? page_address(p) : NULL; ++ vm->root_page_table = page; ++ return page; ++ } ++ ++ /* We're not supposed to have anything bigger than 4k here, because we picked a ++ * 4k granule size at init time. ++ */ ++ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) ++ return NULL; ++ ++ /* We must have some op_ctx attached to the VM and it must have at least one ++ * free page. ++ */ ++ if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) || ++ drm_WARN_ON(&vm->ptdev->base, ++ vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count)) ++ return NULL; ++ ++ page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++]; ++ memset(page, 0, SZ_4K); ++ ++ /* Page table entries don't use virtual addresses, which trips out ++ * kmemleak. kmemleak_alloc_phys() might work, but physical addresses ++ * are mixed with other fields, and I fear kmemleak won't detect that ++ * either. ++ * ++ * Let's just ignore memory passed to the page-table driver for now. ++ */ ++ kmemleak_ignore(page); ++ return page; ++} ++ ++/** ++ * @free_pt() - Custom page table free function ++ * @cookie: Cookie passed at page table allocation time. ++ * @data: Page table to free. ++ * @size: Size of the page table. This size should be fixed, ++ * and determined at creation time based on the granule size. ++ */ ++static void free_pt(void *cookie, void *data, size_t size) ++{ ++ struct panthor_vm *vm = cookie; ++ ++ if (unlikely(vm->root_page_table == data)) { ++ free_pages((unsigned long)data, get_order(size)); ++ vm->root_page_table = NULL; ++ return; ++ } ++ ++ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) ++ return; ++ ++ /* Return the page to the pt_cache. */ ++ kmem_cache_free(pt_cache, data); ++} ++ ++static int wait_ready(struct panthor_device *ptdev, u32 as_nr) ++{ ++ int ret; ++ u32 val; ++ ++ /* Wait for the MMU status to indicate there is no active command, in ++ * case one is pending. ++ */ ++ ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr), ++ val, !(val & AS_STATUS_AS_ACTIVE), ++ 10, 100000); ++ ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n"); ++ } ++ ++ return ret; ++} ++ ++static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd) ++{ ++ int status; ++ ++ /* write AS_COMMAND when MMU is ready to accept another command */ ++ status = wait_ready(ptdev, as_nr); ++ if (!status) ++ gpu_write(ptdev, AS_COMMAND(as_nr), cmd); ++ ++ return status; ++} ++ ++static void lock_region(struct panthor_device *ptdev, u32 as_nr, ++ u64 region_start, u64 size) ++{ ++ u8 region_width; ++ u64 region; ++ u64 region_end = region_start + size; ++ ++ if (!size) ++ return; ++ ++ /* ++ * The locked region is a naturally aligned power of 2 block encoded as ++ * log2 minus(1). ++ * Calculate the desired start/end and look for the highest bit which ++ * differs. The smallest naturally aligned block must include this bit ++ * change, the desired region starts with this bit (and subsequent bits) ++ * zeroed and ends with the bit (and subsequent bits) set to one. ++ */ ++ region_width = max(fls64(region_start ^ (region_end - 1)), ++ const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1; ++ ++ /* ++ * Mask off the low bits of region_start (which would be ignored by ++ * the hardware anyway) ++ */ ++ region_start &= GENMASK_ULL(63, region_width); ++ ++ region = region_width | region_start; ++ ++ /* Lock the region that needs to be updated */ ++ gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); ++ gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); ++ write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); ++} ++ ++static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, ++ u64 iova, u64 size, u32 op) ++{ ++ lockdep_assert_held(&ptdev->mmu->as.slots_lock); ++ ++ if (as_nr < 0) ++ return 0; ++ ++ if (op != AS_COMMAND_UNLOCK) ++ lock_region(ptdev, as_nr, iova, size); ++ ++ /* Run the MMU operation */ ++ write_cmd(ptdev, as_nr, op); ++ ++ /* Wait for the flush to complete */ ++ return wait_ready(ptdev, as_nr); ++} ++ ++static int mmu_hw_do_operation(struct panthor_vm *vm, ++ u64 iova, u64 size, u32 op) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ int ret; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ return ret; ++} ++ ++static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, ++ u64 transtab, u64 transcfg, u64 memattr) ++{ ++ int ret; ++ ++ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); ++ if (ret) ++ return ret; ++ ++ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); ++ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); ++ ++ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); ++ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); ++ ++ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); ++ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); ++ ++ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); ++} ++ ++static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) ++{ ++ int ret; ++ ++ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); ++ if (ret) ++ return ret; ++ ++ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0); ++ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0); ++ ++ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0); ++ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0); ++ ++ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); ++ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0); ++ ++ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); ++} ++ ++static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value) ++{ ++ /* Bits 16 to 31 mean REQ_COMPLETE. */ ++ return value & GENMASK(15, 0); ++} ++ ++static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as) ++{ ++ return BIT(as); ++} ++ ++/** ++ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults ++ * @vm: VM to check. ++ * ++ * Return: true if the VM has unhandled faults, false otherwise. ++ */ ++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm) ++{ ++ return vm->unhandled_fault; ++} ++ ++/** ++ * panthor_vm_is_unusable() - Check if the VM is still usable ++ * @vm: VM to check. ++ * ++ * Return: true if the VM is unusable, false otherwise. ++ */ ++bool panthor_vm_is_unusable(struct panthor_vm *vm) ++{ ++ return vm->unusable; ++} ++ ++static void panthor_vm_release_as_locked(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ lockdep_assert_held(&ptdev->mmu->as.slots_lock); ++ ++ if (drm_WARN_ON(&ptdev->base, vm->as.id < 0)) ++ return; ++ ++ ptdev->mmu->as.slots[vm->as.id].vm = NULL; ++ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); ++ refcount_set(&vm->as.active_cnt, 0); ++ list_del_init(&vm->as.lru_node); ++ vm->as.id = -1; ++} ++ ++/** ++ * panthor_vm_active() - Flag a VM as active ++ * @VM: VM to flag as active. ++ * ++ * Assigns an address space to a VM so it can be used by the GPU/MCU. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_active(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg; ++ int ret = 0, as, cookie; ++ u64 transtab, transcfg; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return -ENODEV; ++ ++ if (refcount_inc_not_zero(&vm->as.active_cnt)) ++ goto out_dev_exit; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ if (refcount_inc_not_zero(&vm->as.active_cnt)) ++ goto out_unlock; ++ ++ as = vm->as.id; ++ if (as >= 0) { ++ /* Unhandled pagefault on this AS, the MMU was disabled. We need to ++ * re-enable the MMU after clearing+unmasking the AS interrupts. ++ */ ++ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) ++ goto out_enable_as; ++ ++ goto out_make_active; ++ } ++ ++ /* Check for a free AS */ ++ if (vm->for_mcu) { ++ drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0)); ++ as = 0; ++ } else { ++ as = ffz(ptdev->mmu->as.alloc_mask | BIT(0)); ++ } ++ ++ if (!(BIT(as) & ptdev->gpu_info.as_present)) { ++ struct panthor_vm *lru_vm; ++ ++ lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list, ++ struct panthor_vm, ++ as.lru_node); ++ if (drm_WARN_ON(&ptdev->base, !lru_vm)) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } ++ ++ drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt)); ++ as = lru_vm->as.id; ++ panthor_vm_release_as_locked(lru_vm); ++ } ++ ++ /* Assign the free or reclaimed AS to the FD */ ++ vm->as.id = as; ++ set_bit(as, &ptdev->mmu->as.alloc_mask); ++ ptdev->mmu->as.slots[as].vm = vm; ++ ++out_enable_as: ++ transtab = cfg->arm_lpae_s1_cfg.ttbr; ++ transcfg = AS_TRANSCFG_PTW_MEMATTR_WB | ++ AS_TRANSCFG_PTW_RA | ++ AS_TRANSCFG_ADRMODE_AARCH64_4K | ++ AS_TRANSCFG_INA_BITS(55 - va_bits); ++ if (ptdev->coherent) ++ transcfg |= AS_TRANSCFG_PTW_SH_OS; ++ ++ /* If the VM is re-activated, we clear the fault. */ ++ vm->unhandled_fault = false; ++ ++ /* Unhandled pagefault on this AS, clear the fault and re-enable interrupts ++ * before enabling the AS. ++ */ ++ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { ++ gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); ++ ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); ++ gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); ++ } ++ ++ ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); ++ ++out_make_active: ++ if (!ret) { ++ refcount_set(&vm->as.active_cnt, 1); ++ list_del_init(&vm->as.lru_node); ++ } ++ ++out_unlock: ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++/** ++ * panthor_vm_idle() - Flag a VM idle ++ * @VM: VM to flag as idle. ++ * ++ * When we know the GPU is done with the VM (no more jobs to process), ++ * we can relinquish the AS slot attached to this VM, if any. ++ * ++ * We don't release the slot immediately, but instead place the VM in ++ * the LRU list, so it can be evicted if another VM needs an AS slot. ++ * This way, VMs keep attached to the AS they were given until we run ++ * out of free slot, limiting the number of MMU operations (TLB flush ++ * and other AS updates). ++ */ ++void panthor_vm_idle(struct panthor_vm *vm) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock)) ++ return; ++ ++ if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node))) ++ list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list); ++ ++ refcount_set(&vm->as.active_cnt, 0); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++} ++ ++static void panthor_vm_stop(struct panthor_vm *vm) ++{ ++ drm_sched_stop(&vm->sched, NULL); ++} ++ ++static void panthor_vm_start(struct panthor_vm *vm) ++{ ++ drm_sched_start(&vm->sched, true); ++} ++ ++/** ++ * panthor_vm_as() - Get the AS slot attached to a VM ++ * @vm: VM to get the AS slot of. ++ * ++ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise. ++ */ ++int panthor_vm_as(struct panthor_vm *vm) ++{ ++ return vm->as.id; ++} ++ ++static size_t get_pgsize(u64 addr, size_t size, size_t *count) ++{ ++ /* ++ * io-pgtable only operates on multiple pages within a single table ++ * entry, so we need to split at boundaries of the table size, i.e. ++ * the next block size up. The distance from address A to the next ++ * boundary of block size B is logically B - A % B, but in unsigned ++ * two's complement where B is a power of two we get the equivalence ++ * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :) ++ */ ++ size_t blk_offset = -addr % SZ_2M; ++ ++ if (blk_offset || size < SZ_2M) { ++ *count = min_not_zero(blk_offset, size) / SZ_4K; ++ return SZ_4K; ++ } ++ blk_offset = -addr % SZ_1G ?: SZ_1G; ++ *count = min(blk_offset, size) / SZ_2M; ++ return SZ_2M; ++} ++ ++static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ int ret = 0, cookie; ++ ++ if (vm->as.id < 0) ++ return 0; ++ ++ /* If the device is unplugged, we just silently skip the flush. */ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return 0; ++ ++ /* Flush the PTs only if we're already awake */ ++ if (pm_runtime_active(ptdev->base.dev)) ++ ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ struct io_pgtable_ops *ops = vm->pgtbl_ops; ++ u64 offset = 0; ++ ++ drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); ++ ++ while (offset < size) { ++ size_t unmapped_sz = 0, pgcount; ++ size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); ++ ++ unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL); ++ ++ if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) { ++ drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n", ++ iova + offset + unmapped_sz, ++ iova + offset + pgsize * pgcount, ++ iova, iova + size); ++ panthor_vm_flush_range(vm, iova, offset + unmapped_sz); ++ return -EINVAL; ++ } ++ offset += unmapped_sz; ++ } ++ ++ return panthor_vm_flush_range(vm, iova, size); ++} ++ ++static int ++panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, ++ struct sg_table *sgt, u64 offset, u64 size) ++{ ++ struct panthor_device *ptdev = vm->ptdev; ++ unsigned int count; ++ struct scatterlist *sgl; ++ struct io_pgtable_ops *ops = vm->pgtbl_ops; ++ u64 start_iova = iova; ++ int ret; ++ ++ if (!size) ++ return 0; ++ ++ for_each_sgtable_dma_sg(sgt, sgl, count) { ++ dma_addr_t paddr = sg_dma_address(sgl); ++ size_t len = sg_dma_len(sgl); ++ ++ if (len <= offset) { ++ offset -= len; ++ continue; ++ } ++ ++ paddr += offset; ++ len -= offset; ++ len = min_t(size_t, len, size); ++ size -= len; ++ ++ drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", ++ vm->as.id, iova, &paddr, len); ++ ++ while (len) { ++ size_t pgcount, mapped = 0; ++ size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); ++ ++ ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, ++ GFP_KERNEL, &mapped); ++ iova += mapped; ++ paddr += mapped; ++ len -= mapped; ++ ++ if (drm_WARN_ON(&ptdev->base, !ret && !mapped)) ++ ret = -ENOMEM; ++ ++ if (ret) { ++ /* If something failed, unmap what we've already mapped before ++ * returning. The unmap call is not supposed to fail. ++ */ ++ drm_WARN_ON(&ptdev->base, ++ panthor_vm_unmap_pages(vm, start_iova, ++ iova - start_iova)); ++ return ret; ++ } ++ } ++ ++ if (!size) ++ break; ++ } ++ ++ return panthor_vm_flush_range(vm, start_iova, iova - start_iova); ++} ++ ++static int flags_to_prot(u32 flags) ++{ ++ int prot = 0; ++ ++ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC) ++ prot |= IOMMU_NOEXEC; ++ ++ if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)) ++ prot |= IOMMU_CACHE; ++ ++ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY) ++ prot |= IOMMU_READ; ++ else ++ prot |= IOMMU_READ | IOMMU_WRITE; ++ ++ return prot; ++} ++ ++/** ++ * panthor_vm_alloc_va() - Allocate a region in the auto-va space ++ * @VM: VM to allocate a region on. ++ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user ++ * wants the VA to be automatically allocated from the auto-VA range. ++ * @size: size of the VA range. ++ * @va_node: drm_mm_node to initialize. Must be zero-initialized. ++ * ++ * Some GPU objects, like heap chunks, are fully managed by the kernel and ++ * need to be mapped to the userspace VM, in the region reserved for kernel ++ * objects. ++ * ++ * This function takes care of allocating a region in the kernel auto-VA space. ++ * ++ * Return: 0 on success, an error code otherwise. ++ */ ++int ++panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, ++ struct drm_mm_node *va_node) ++{ ++ int ret; ++ ++ if (!size || (size & ~PAGE_MASK)) ++ return -EINVAL; ++ ++ if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK)) ++ return -EINVAL; ++ ++ mutex_lock(&vm->mm_lock); ++ if (va != PANTHOR_VM_KERNEL_AUTO_VA) { ++ va_node->start = va; ++ va_node->size = size; ++ ret = drm_mm_reserve_node(&vm->mm, va_node); ++ } else { ++ ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size, ++ size >= SZ_2M ? SZ_2M : SZ_4K, ++ 0, vm->kernel_auto_va.start, ++ vm->kernel_auto_va.end, ++ DRM_MM_INSERT_BEST); ++ } ++ mutex_unlock(&vm->mm_lock); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va() ++ * @VM: VM to free the region on. ++ * @va_node: Memory node representing the region to free. ++ */ ++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) ++{ ++ mutex_lock(&vm->mm_lock); ++ drm_mm_remove_node(va_node); ++ mutex_unlock(&vm->mm_lock); ++} ++ ++static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); ++ struct drm_gpuvm *vm = vm_bo->vm; ++ bool unpin; ++ ++ /* We must retain the GEM before calling drm_gpuvm_bo_put(), ++ * otherwise the mutex might be destroyed while we hold it. ++ * Same goes for the VM, since we take the VM resv lock. ++ */ ++ drm_gem_object_get(&bo->base.base); ++ drm_gpuvm_get(vm); ++ ++ /* We take the resv lock to protect against concurrent accesses to the ++ * gpuvm evicted/extobj lists that are modified in ++ * drm_gpuvm_bo_destroy(), which is called if drm_gpuvm_bo_put() ++ * releases sthe last vm_bo reference. ++ * We take the BO GPUVA list lock to protect the vm_bo removal from the ++ * GEM vm_bo list. ++ */ ++ dma_resv_lock(drm_gpuvm_resv(vm), NULL); ++ mutex_lock(&bo->gpuva_list_lock); ++ unpin = drm_gpuvm_bo_put(vm_bo); ++ mutex_unlock(&bo->gpuva_list_lock); ++ dma_resv_unlock(drm_gpuvm_resv(vm)); ++ ++ /* If the vm_bo object was destroyed, release the pin reference that ++ * was hold by this object. ++ */ ++ if (unpin && !bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ drm_gpuvm_put(vm); ++ drm_gem_object_put(&bo->base.base); ++} ++ ++static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm) ++{ ++ struct panthor_vma *vma, *tmp_vma; ++ ++ u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - ++ op_ctx->rsvd_page_tables.ptr; ++ ++ if (remaining_pt_count) { ++ kmem_cache_free_bulk(pt_cache, remaining_pt_count, ++ op_ctx->rsvd_page_tables.pages + ++ op_ctx->rsvd_page_tables.ptr); ++ } ++ ++ kfree(op_ctx->rsvd_page_tables.pages); ++ ++ if (op_ctx->map.vm_bo) ++ panthor_vm_bo_put(op_ctx->map.vm_bo); ++ ++ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) ++ kfree(op_ctx->preallocated_vmas[i]); ++ ++ list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { ++ list_del(&vma->node); ++ panthor_vm_bo_put(vma->base.vm_bo); ++ kfree(vma); ++ } ++} ++ ++static struct panthor_vma * ++panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx) ++{ ++ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { ++ struct panthor_vma *vma = op_ctx->preallocated_vmas[i]; ++ ++ if (vma) { ++ op_ctx->preallocated_vmas[i] = NULL; ++ return vma; ++ } ++ } ++ ++ return NULL; ++} ++ ++static int ++panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) ++{ ++ u32 vma_count; ++ ++ switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ /* One VMA for the new mapping, and two more VMAs for the remap case ++ * which might contain both a prev and next VA. ++ */ ++ vma_count = 3; ++ break; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ /* Partial unmaps might trigger a remap with either a prev or a next VA, ++ * but not both. ++ */ ++ vma_count = 1; ++ break; ++ ++ default: ++ return 0; ++ } ++ ++ for (u32 i = 0; i < vma_count; i++) { ++ struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); ++ ++ if (!vma) ++ return -ENOMEM; ++ ++ op_ctx->preallocated_vmas[i] = vma; ++ } ++ ++ return 0; ++} ++ ++#define PANTHOR_VM_BIND_OP_MAP_FLAGS \ ++ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \ ++ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ ++static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm, ++ struct panthor_gem_object *bo, ++ u64 offset, ++ u64 size, u64 va, ++ u32 flags) ++{ ++ struct drm_gpuvm_bo *preallocated_vm_bo; ++ struct sg_table *sgt = NULL; ++ u64 pt_count; ++ int ret; ++ ++ if (!bo) ++ return -EINVAL; ++ ++ if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) || ++ (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) ++ return -EINVAL; ++ ++ /* Make sure the VA and size are aligned and in-bounds. */ ++ if (size > bo->base.base.size || offset > bo->base.base.size - size) ++ return -EINVAL; ++ ++ /* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */ ++ if (bo->exclusive_vm_root_gem && ++ bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm)) ++ return -EINVAL; ++ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->flags = flags; ++ op_ctx->va.range = size; ++ op_ctx->va.addr = va; ++ ++ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); ++ if (ret) ++ goto err_cleanup; ++ ++ if (!bo->base.base.import_attach) { ++ /* Pre-reserve the BO pages, so the map operation doesn't have to ++ * allocate. ++ */ ++ ret = drm_gem_shmem_pin(&bo->base); ++ if (ret) ++ goto err_cleanup; ++ } ++ ++ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); ++ if (IS_ERR(sgt)) { ++ if (!bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ ret = PTR_ERR(sgt); ++ goto err_cleanup; ++ } ++ ++ op_ctx->map.sgt = sgt; ++ ++ preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); ++ if (!preallocated_vm_bo) { ++ if (!bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); ++ mutex_unlock(&bo->gpuva_list_lock); ++ ++ /* If the a vm_bo for this combination exists, it already ++ * retains a pin ref, and we can release the one we took earlier. ++ * ++ * If our pre-allocated vm_bo is picked, it now retains the pin ref, ++ * which will be released in panthor_vm_bo_put(). ++ */ ++ if (preallocated_vm_bo != op_ctx->map.vm_bo && ++ !bo->base.base.import_attach) ++ drm_gem_shmem_unpin(&bo->base); ++ ++ op_ctx->map.bo_offset = offset; ++ ++ /* L1, L2 and L3 page tables. ++ * We could optimize L3 allocation by iterating over the sgt and merging ++ * 2M contiguous blocks, but it's simpler to over-provision and return ++ * the pages if they're not used. ++ */ ++ pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) + ++ ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) + ++ ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21); ++ ++ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, ++ sizeof(*op_ctx->rsvd_page_tables.pages), ++ GFP_KERNEL); ++ if (!op_ctx->rsvd_page_tables.pages) ++ goto err_cleanup; ++ ++ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, ++ op_ctx->rsvd_page_tables.pages); ++ op_ctx->rsvd_page_tables.count = ret; ++ if (ret != pt_count) { ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ ++ /* Insert BO into the extobj list last, when we know nothing can fail. */ ++ dma_resv_lock(panthor_vm_resv(vm), NULL); ++ drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo); ++ dma_resv_unlock(panthor_vm_resv(vm)); ++ ++ return 0; ++ ++err_cleanup: ++ panthor_vm_cleanup_op_ctx(op_ctx, vm); ++ return ret; ++} ++ ++static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm, ++ u64 va, u64 size) ++{ ++ u32 pt_count = 0; ++ int ret; ++ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->va.range = size; ++ op_ctx->va.addr = va; ++ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; ++ ++ /* Pre-allocate L3 page tables to account for the split-2M-block ++ * situation on unmap. ++ */ ++ if (va != ALIGN(va, SZ_2M)) ++ pt_count++; ++ ++ if (va + size != ALIGN(va + size, SZ_2M) && ++ ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M)) ++ pt_count++; ++ ++ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); ++ if (ret) ++ goto err_cleanup; ++ ++ if (pt_count) { ++ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, ++ sizeof(*op_ctx->rsvd_page_tables.pages), ++ GFP_KERNEL); ++ if (!op_ctx->rsvd_page_tables.pages) ++ goto err_cleanup; ++ ++ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, ++ op_ctx->rsvd_page_tables.pages); ++ if (ret != pt_count) { ++ ret = -ENOMEM; ++ goto err_cleanup; ++ } ++ op_ctx->rsvd_page_tables.count = pt_count; ++ } ++ ++ return 0; ++ ++err_cleanup: ++ panthor_vm_cleanup_op_ctx(op_ctx, vm); ++ return ret; ++} ++ ++static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx, ++ struct panthor_vm *vm) ++{ ++ memset(op_ctx, 0, sizeof(*op_ctx)); ++ INIT_LIST_HEAD(&op_ctx->returned_vmas); ++ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; ++} ++ ++/** ++ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address ++ * @vm: VM to look into. ++ * @va: Virtual address to search for. ++ * @bo_offset: Offset of the GEM object mapped at this virtual address. ++ * Only valid on success. ++ * ++ * The object returned by this function might no longer be mapped when the ++ * function returns. It's the caller responsibility to ensure there's no ++ * concurrent map/unmap operations making the returned value invalid, or ++ * make sure it doesn't matter if the object is no longer mapped. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_gem_object * ++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset) ++{ ++ struct panthor_gem_object *bo = ERR_PTR(-ENOENT); ++ struct drm_gpuva *gpuva; ++ struct panthor_vma *vma; ++ ++ /* Take the VM lock to prevent concurrent map/unmap operations. */ ++ mutex_lock(&vm->op_lock); ++ gpuva = drm_gpuva_find_first(&vm->base, va, 1); ++ vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL; ++ if (vma && vma->base.gem.obj) { ++ drm_gem_object_get(vma->base.gem.obj); ++ bo = to_panthor_bo(vma->base.gem.obj); ++ *bo_offset = vma->base.gem.offset + (va - vma->base.va.addr); ++ } ++ mutex_unlock(&vm->op_lock); ++ ++ return bo; ++} ++ ++#define PANTHOR_VM_MIN_KERNEL_VA_SIZE SZ_256M ++ ++static u64 ++panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args, ++ u64 full_va_range) ++{ ++ u64 user_va_range; ++ ++ /* Make sure we have a minimum amount of VA space for kernel objects. */ ++ if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE) ++ return 0; ++ ++ if (args->user_va_range) { ++ /* Use the user provided value if != 0. */ ++ user_va_range = args->user_va_range; ++ } else if (TASK_SIZE_OF(current) < full_va_range) { ++ /* If the task VM size is smaller than the GPU VA range, pick this ++ * as our default user VA range, so userspace can CPU/GPU map buffers ++ * at the same address. ++ */ ++ user_va_range = TASK_SIZE_OF(current); ++ } else { ++ /* If the GPU VA range is smaller than the task VM size, we ++ * just have to live with the fact we won't be able to map ++ * all buffers at the same GPU/CPU address. ++ * ++ * If the GPU VA range is bigger than 4G (more than 32-bit of ++ * VA), we split the range in two, and assign half of it to ++ * the user and the other half to the kernel, if it's not, we ++ * keep the kernel VA space as small as possible. ++ */ ++ user_va_range = full_va_range > SZ_4G ? ++ full_va_range / 2 : ++ full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; ++ } ++ ++ if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range) ++ user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; ++ ++ return user_va_range; ++} ++ ++#define PANTHOR_VM_CREATE_FLAGS 0 ++ ++static int ++panthor_vm_create_check_args(const struct panthor_device *ptdev, ++ const struct drm_panthor_vm_create *args, ++ u64 *kernel_va_start, u64 *kernel_va_range) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ u64 full_va_range = 1ull << va_bits; ++ u64 user_va_range; ++ ++ if (args->flags & ~PANTHOR_VM_CREATE_FLAGS) ++ return -EINVAL; ++ ++ user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range); ++ if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range)) ++ return -EINVAL; ++ ++ /* Pick a kernel VA range that's a power of two, to have a clear split. */ ++ *kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range); ++ *kernel_va_start = full_va_range - *kernel_va_range; ++ return 0; ++} ++ ++/* ++ * Only 32 VMs per open file. If that becomes a limiting factor, we can ++ * increase this number. ++ */ ++#define PANTHOR_MAX_VMS_PER_FILE 32 ++ ++/** ++ * panthor_vm_pool_create_vm() - Create a VM ++ * @pool: The VM to create this VM on. ++ * @kernel_va_start: Start of the region reserved for kernel objects. ++ * @kernel_va_range: Size of the region reserved for kernel objects. ++ * ++ * Return: a positive VM ID on success, a negative error code otherwise. ++ */ ++int panthor_vm_pool_create_vm(struct panthor_device *ptdev, ++ struct panthor_vm_pool *pool, ++ struct drm_panthor_vm_create *args) ++{ ++ u64 kernel_va_start, kernel_va_range; ++ struct panthor_vm *vm; ++ int ret; ++ u32 id; ++ ++ ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range); ++ if (ret) ++ return ret; ++ ++ vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range, ++ kernel_va_start, kernel_va_range); ++ if (IS_ERR(vm)) ++ return PTR_ERR(vm); ++ ++ ret = xa_alloc(&pool->xa, &id, vm, ++ XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL); ++ ++ if (ret) { ++ panthor_vm_put(vm); ++ return ret; ++ } ++ ++ args->user_va_range = kernel_va_start; ++ return id; ++} ++ ++static void panthor_vm_destroy(struct panthor_vm *vm) ++{ ++ if (!vm) ++ return; ++ ++ vm->destroyed = true; ++ ++ mutex_lock(&vm->heaps.lock); ++ panthor_heap_pool_destroy(vm->heaps.pool); ++ vm->heaps.pool = NULL; ++ mutex_unlock(&vm->heaps.lock); ++ ++ drm_WARN_ON(&vm->ptdev->base, ++ panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range)); ++ panthor_vm_put(vm); ++} ++ ++/** ++ * panthor_vm_pool_destroy_vm() - Destroy a VM. ++ * @pool: VM pool. ++ * @handle: VM handle. ++ * ++ * This function doesn't free the VM object or its resources, it just kills ++ * all mappings, and makes sure nothing can be mapped after that point. ++ * ++ * If there was any active jobs at the time this function is called, these ++ * jobs should experience page faults and be killed as a result. ++ * ++ * The VM resources are freed when the last reference on the VM object is ++ * dropped. ++ */ ++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle) ++{ ++ struct panthor_vm *vm; ++ ++ vm = xa_erase(&pool->xa, handle); ++ ++ panthor_vm_destroy(vm); ++ ++ return vm ? 0 : -EINVAL; ++} ++ ++/** ++ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle ++ * @pool: VM pool to check. ++ * @handle: Handle of the VM to retrieve. ++ * ++ * Return: A valid pointer if the VM exists, NULL otherwise. ++ */ ++struct panthor_vm * ++panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle) ++{ ++ struct panthor_vm *vm; ++ ++ vm = panthor_vm_get(xa_load(&pool->xa, handle)); ++ ++ return vm; ++} ++ ++/** ++ * panthor_vm_pool_destroy() - Destroy a VM pool. ++ * @pfile: File. ++ * ++ * Destroy all VMs in the pool, and release the pool resources. ++ * ++ * Note that VMs can outlive the pool they were created from if other ++ * objects hold a reference to there VMs. ++ */ ++void panthor_vm_pool_destroy(struct panthor_file *pfile) ++{ ++ struct panthor_vm *vm; ++ unsigned long i; ++ ++ if (!pfile->vms) ++ return; ++ ++ xa_for_each(&pfile->vms->xa, i, vm) ++ panthor_vm_destroy(vm); ++ ++ xa_destroy(&pfile->vms->xa); ++ kfree(pfile->vms); ++} ++ ++/** ++ * panthor_vm_pool_create() - Create a VM pool ++ * @pfile: File. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_pool_create(struct panthor_file *pfile) ++{ ++ pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL); ++ if (!pfile->vms) ++ return -ENOMEM; ++ ++ xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1); ++ return 0; ++} ++ ++/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */ ++static void mmu_tlb_flush_all(void *cookie) ++{ ++} ++ ++static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) ++{ ++} ++ ++static const struct iommu_flush_ops mmu_tlb_ops = { ++ .tlb_flush_all = mmu_tlb_flush_all, ++ .tlb_flush_walk = mmu_tlb_flush_walk, ++}; ++ ++static const char *access_type_name(struct panthor_device *ptdev, ++ u32 fault_status) ++{ ++ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { ++ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: ++ return "ATOMIC"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_READ: ++ return "READ"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: ++ return "WRITE"; ++ case AS_FAULTSTATUS_ACCESS_TYPE_EX: ++ return "EXECUTE"; ++ default: ++ drm_WARN_ON(&ptdev->base, 1); ++ return NULL; ++ } ++} ++ ++static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ bool has_unhandled_faults = false; ++ ++ status = panthor_mmu_fault_mask(ptdev, status); ++ while (status) { ++ u32 as = ffs(status | (status >> 16)) - 1; ++ u32 mask = panthor_mmu_as_fault_mask(ptdev, as); ++ u32 new_int_mask; ++ u64 addr; ++ u32 fault_status; ++ u32 exception_type; ++ u32 access_type; ++ u32 source_id; ++ ++ fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as)); ++ addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as)); ++ addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32; ++ ++ /* decode the fault status */ ++ exception_type = fault_status & 0xFF; ++ access_type = (fault_status >> 8) & 0x3; ++ source_id = (fault_status >> 16); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ ptdev->mmu->as.faulty_mask |= mask; ++ new_int_mask = ++ panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask); ++ ++ /* terminal fault, print info about the fault */ ++ drm_err(&ptdev->base, ++ "Unhandled Page fault in AS%d at VA 0x%016llX\n" ++ "raw fault status: 0x%X\n" ++ "decoded fault status: %s\n" ++ "exception type 0x%X: %s\n" ++ "access type 0x%X: %s\n" ++ "source id 0x%X\n", ++ as, addr, ++ fault_status, ++ (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), ++ exception_type, panthor_exception_name(ptdev, exception_type), ++ access_type, access_type_name(ptdev, fault_status), ++ source_id); ++ ++ /* Ignore MMU interrupts on this AS until it's been ++ * re-enabled. ++ */ ++ ptdev->mmu->irq.mask = new_int_mask; ++ gpu_write(ptdev, MMU_INT_MASK, new_int_mask); ++ ++ if (ptdev->mmu->as.slots[as].vm) ++ ptdev->mmu->as.slots[as].vm->unhandled_fault = true; ++ ++ /* Disable the MMU to kill jobs on this AS. */ ++ panthor_mmu_as_disable(ptdev, as); ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ status &= ~mask; ++ has_unhandled_faults = true; ++ } ++ ++ if (has_unhandled_faults) ++ panthor_sched_report_mmu_fault(ptdev); ++} ++PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler); ++ ++/** ++ * panthor_mmu_suspend() - Suspend the MMU logic ++ * @ptdev: Device. ++ * ++ * All we do here is de-assign the AS slots on all active VMs, so things ++ * get flushed to the main memory, and no further access to these VMs are ++ * possible. ++ * ++ * We also suspend the MMU IRQ. ++ */ ++void panthor_mmu_suspend(struct panthor_device *ptdev) ++{ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) { ++ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); ++ panthor_vm_release_as_locked(vm); ++ } ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++} ++ ++/** ++ * panthor_mmu_resume() - Resume the MMU logic ++ * @ptdev: Device. ++ * ++ * Resume the IRQ. ++ * ++ * We don't re-enable previously active VMs. We assume other parts of the ++ * driver will call panthor_vm_active() on the VMs they intend to use. ++ */ ++void panthor_mmu_resume(struct panthor_device *ptdev) ++{ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ptdev->mmu->as.alloc_mask = 0; ++ ptdev->mmu->as.faulty_mask = 0; ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); ++} ++ ++/** ++ * panthor_mmu_pre_reset() - Prepare for a reset ++ * @ptdev: Device. ++ * ++ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we ++ * don't get asked to do a VM operation while the GPU is down. ++ * ++ * We don't cleanly shutdown the AS slots here, because the reset might ++ * come from an AS_ACTIVE_BIT stuck situation. ++ */ ++void panthor_mmu_pre_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_vm *vm; ++ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ ptdev->mmu->vm.reset_in_progress = true; ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) ++ panthor_vm_stop(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++} ++ ++/** ++ * panthor_mmu_post_reset() - Restore things after a reset ++ * @ptdev: Device. ++ * ++ * Put the MMU logic back in action after a reset. That implies resuming the ++ * IRQ and re-enabling the VM_BIND queues. ++ */ ++void panthor_mmu_post_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_vm *vm; ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ ++ /* Now that the reset is effective, we can assume that none of the ++ * AS slots are setup, and clear the faulty flags too. ++ */ ++ ptdev->mmu->as.alloc_mask = 0; ++ ptdev->mmu->as.faulty_mask = 0; ++ ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) ++ panthor_vm_release_as_locked(vm); ++ } ++ ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); ++ ++ /* Restart the VM_BIND queues. */ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { ++ panthor_vm_start(vm); ++ } ++ ptdev->mmu->vm.reset_in_progress = false; ++ mutex_unlock(&ptdev->mmu->vm.lock); ++} ++ ++static void panthor_vm_free(struct drm_gpuvm *gpuvm) ++{ ++ struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base); ++ struct panthor_device *ptdev = vm->ptdev; ++ ++ mutex_lock(&vm->heaps.lock); ++ if (drm_WARN_ON(&ptdev->base, vm->heaps.pool)) ++ panthor_heap_pool_destroy(vm->heaps.pool); ++ mutex_unlock(&vm->heaps.lock); ++ mutex_destroy(&vm->heaps.lock); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_del(&vm->node); ++ /* Restore the scheduler state so we can call drm_sched_entity_destroy() ++ * and drm_sched_fini(). If get there, that means we have no job left ++ * and no new jobs can be queued, so we can start the scheduler without ++ * risking interfering with the reset. ++ */ ++ if (ptdev->mmu->vm.reset_in_progress) ++ panthor_vm_start(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ drm_sched_entity_destroy(&vm->entity); ++ drm_sched_fini(&vm->sched); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ if (vm->as.id >= 0) { ++ int cookie; ++ ++ if (drm_dev_enter(&ptdev->base, &cookie)) { ++ panthor_mmu_as_disable(ptdev, vm->as.id); ++ drm_dev_exit(cookie); ++ } ++ ++ ptdev->mmu->as.slots[vm->as.id].vm = NULL; ++ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); ++ list_del(&vm->as.lru_node); ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++ ++ free_io_pgtable_ops(vm->pgtbl_ops); ++ ++ drm_mm_takedown(&vm->mm); ++ kfree(vm); ++} ++ ++/** ++ * panthor_vm_put() - Release a reference on a VM ++ * @vm: VM to release the reference on. Can be NULL. ++ */ ++void panthor_vm_put(struct panthor_vm *vm) ++{ ++ drm_gpuvm_put(vm ? &vm->base : NULL); ++} ++ ++/** ++ * panthor_vm_get() - Get a VM reference ++ * @vm: VM to get the reference on. Can be NULL. ++ * ++ * Return: @vm value. ++ */ ++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm) ++{ ++ if (vm) ++ drm_gpuvm_get(&vm->base); ++ ++ return vm; ++} ++ ++/** ++ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM ++ * @vm: VM to query the heap pool on. ++ * @create: True if the heap pool should be created when it doesn't exist. ++ * ++ * Heap pools are per-VM. This function allows one to retrieve the heap pool ++ * attached to a VM. ++ * ++ * If no heap pool exists yet, and @create is true, we create one. ++ * ++ * The returned panthor_heap_pool should be released with panthor_heap_pool_put(). ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create) ++{ ++ struct panthor_heap_pool *pool; ++ ++ mutex_lock(&vm->heaps.lock); ++ if (!vm->heaps.pool && create) { ++ if (vm->destroyed) ++ pool = ERR_PTR(-EINVAL); ++ else ++ pool = panthor_heap_pool_create(vm->ptdev, vm); ++ ++ if (!IS_ERR(pool)) ++ vm->heaps.pool = panthor_heap_pool_get(pool); ++ } else { ++ pool = panthor_heap_pool_get(vm->heaps.pool); ++ } ++ mutex_unlock(&vm->heaps.lock); ++ ++ return pool; ++} ++ ++static u64 mair_to_memattr(u64 mair) ++{ ++ u64 memattr = 0; ++ u32 i; ++ ++ for (i = 0; i < 8; i++) { ++ u8 in_attr = mair >> (8 * i), out_attr; ++ u8 outer = in_attr >> 4, inner = in_attr & 0xf; ++ ++ /* For caching to be enabled, inner and outer caching policy ++ * have to be both write-back, if one of them is write-through ++ * or non-cacheable, we just choose non-cacheable. Device ++ * memory is also translated to non-cacheable. ++ */ ++ if (!(outer & 3) || !(outer & 4) || !(inner & 4)) { ++ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC | ++ AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | ++ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); ++ } else { ++ /* Use SH_CPU_INNER mode so SH_IS, which is used when ++ * IOMMU_CACHE is set, actually maps to the standard ++ * definition of inner-shareable and not Mali's ++ * internal-shareable mode. ++ */ ++ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | ++ AS_MEMATTR_AARCH64_SH_CPU_INNER | ++ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); ++ } ++ ++ memattr |= (u64)out_attr << (8 * i); ++ } ++ ++ return memattr; ++} ++ ++static void panthor_vma_link(struct panthor_vm *vm, ++ struct panthor_vma *vma, ++ struct drm_gpuvm_bo *vm_bo) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ drm_gpuva_link(&vma->base, vm_bo); ++ drm_WARN_ON(&vm->ptdev->base, drm_gpuvm_bo_put(vm_bo)); ++ mutex_unlock(&bo->gpuva_list_lock); ++} ++ ++static void panthor_vma_unlink(struct panthor_vm *vm, ++ struct panthor_vma *vma) ++{ ++ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); ++ struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); ++ ++ mutex_lock(&bo->gpuva_list_lock); ++ drm_gpuva_unlink(&vma->base); ++ mutex_unlock(&bo->gpuva_list_lock); ++ ++ /* drm_gpuva_unlink() release the vm_bo, but we manually retained it ++ * when entering this function, so we can implement deferred VMA ++ * destruction. Re-assign it here. ++ */ ++ vma->base.vm_bo = vm_bo; ++ list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); ++} ++ ++static void panthor_vma_init(struct panthor_vma *vma, u32 flags) ++{ ++ INIT_LIST_HEAD(&vma->node); ++ vma->flags = flags; ++} ++ ++#define PANTHOR_VM_MAP_FLAGS \ ++ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED) ++ ++static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) ++{ ++ struct panthor_vm *vm = priv; ++ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; ++ struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ int ret; ++ ++ if (!vma) ++ return -EINVAL; ++ ++ panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS); ++ ++ ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), ++ op_ctx->map.sgt, op->map.gem.offset, ++ op->map.va.range); ++ if (ret) ++ return ret; ++ ++ /* Ref owned by the mapping now, clear the obj field so we don't release the ++ * pinning/obj ref behind GPUVA's back. ++ */ ++ drm_gpuva_map(&vm->base, &vma->base, &op->map); ++ panthor_vma_link(vm, vma, op_ctx->map.vm_bo); ++ op_ctx->map.vm_bo = NULL; ++ return 0; ++} ++ ++static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, ++ void *priv) ++{ ++ struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base); ++ struct panthor_vm *vm = priv; ++ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; ++ struct panthor_vma *prev_vma = NULL, *next_vma = NULL; ++ u64 unmap_start, unmap_range; ++ int ret; ++ ++ drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range); ++ ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range); ++ if (ret) ++ return ret; ++ ++ if (op->remap.prev) { ++ prev_vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ panthor_vma_init(prev_vma, unmap_vma->flags); ++ } ++ ++ if (op->remap.next) { ++ next_vma = panthor_vm_op_ctx_get_vma(op_ctx); ++ panthor_vma_init(next_vma, unmap_vma->flags); ++ } ++ ++ drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL, ++ next_vma ? &next_vma->base : NULL, ++ &op->remap); ++ ++ if (prev_vma) { ++ /* panthor_vma_link() transfers the vm_bo ownership to ++ * the VMA object. Since the vm_bo we're passing is still ++ * owned by the old mapping which will be released when this ++ * mapping is destroyed, we need to grab a ref here. ++ */ ++ panthor_vma_link(vm, prev_vma, ++ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); ++ } ++ ++ if (next_vma) { ++ panthor_vma_link(vm, next_vma, ++ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); ++ } ++ ++ panthor_vma_unlink(vm, unmap_vma); ++ return 0; ++} ++ ++static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, ++ void *priv) ++{ ++ struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base); ++ struct panthor_vm *vm = priv; ++ int ret; ++ ++ ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr, ++ unmap_vma->base.va.range); ++ if (drm_WARN_ON(&vm->ptdev->base, ret)) ++ return ret; ++ ++ drm_gpuva_unmap(&op->unmap); ++ panthor_vma_unlink(vm, unmap_vma); ++ return 0; ++} ++ ++static const struct drm_gpuvm_ops panthor_gpuvm_ops = { ++ .vm_free = panthor_vm_free, ++ .sm_step_map = panthor_gpuva_sm_step_map, ++ .sm_step_remap = panthor_gpuva_sm_step_remap, ++ .sm_step_unmap = panthor_gpuva_sm_step_unmap, ++}; ++ ++/** ++ * panthor_vm_resv() - Get the dma_resv object attached to a VM. ++ * @vm: VM to get the dma_resv of. ++ * ++ * Return: A dma_resv object. ++ */ ++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm) ++{ ++ return drm_gpuvm_resv(&vm->base); ++} ++ ++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm) ++{ ++ if (!vm) ++ return NULL; ++ ++ return vm->base.r_obj; ++} ++ ++static int ++panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, ++ bool flag_vm_unusable_on_failure) ++{ ++ u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK; ++ int ret; ++ ++ if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY) ++ return 0; ++ ++ mutex_lock(&vm->op_lock); ++ vm->op_ctx = op; ++ switch (op_type) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ if (vm->unusable) { ++ ret = -EINVAL; ++ break; ++ } ++ ++ ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range, ++ op->map.vm_bo->obj, op->map.bo_offset); ++ break; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range); ++ break; ++ ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ if (ret && flag_vm_unusable_on_failure) ++ vm->unusable = true; ++ ++ vm->op_ctx = NULL; ++ mutex_unlock(&vm->op_lock); ++ ++ return ret; ++} ++ ++static struct dma_fence * ++panthor_vm_bind_run_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ bool cookie; ++ int ret; ++ ++ /* Not only we report an error whose result is propagated to the ++ * drm_sched finished fence, but we also flag the VM as unusable, because ++ * a failure in the async VM_BIND results in an inconsistent state. VM needs ++ * to be destroyed and recreated. ++ */ ++ cookie = dma_fence_begin_signalling(); ++ ret = panthor_vm_exec_op(job->vm, &job->ctx, true); ++ dma_fence_end_signalling(cookie); ++ ++ return ret ? ERR_PTR(ret) : NULL; ++} ++ ++static void panthor_vm_bind_job_release(struct kref *kref) ++{ ++ struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount); ++ ++ if (job->base.s_fence) ++ drm_sched_job_cleanup(&job->base); ++ ++ panthor_vm_cleanup_op_ctx(&job->ctx, job->vm); ++ panthor_vm_put(job->vm); ++ kfree(job); ++} ++ ++/** ++ * panthor_vm_bind_job_put() - Release a VM_BIND job reference ++ * @sched_job: Job to release the reference on. ++ */ ++void panthor_vm_bind_job_put(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ if (sched_job) ++ kref_put(&job->refcount, panthor_vm_bind_job_release); ++} ++ ++static void ++panthor_vm_bind_free_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ drm_sched_job_cleanup(sched_job); ++ ++ /* Do the heavy cleanups asynchronously, so we're out of the ++ * dma-signaling path and can acquire dma-resv locks safely. ++ */ ++ queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work); ++} ++ ++static enum drm_gpu_sched_stat ++panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) ++{ ++ WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); ++ return DRM_GPU_SCHED_STAT_NOMINAL; ++} ++ ++static const struct drm_sched_backend_ops panthor_vm_bind_ops = { ++ .run_job = panthor_vm_bind_run_job, ++ .free_job = panthor_vm_bind_free_job, ++ .timedout_job = panthor_vm_bind_timedout_job, ++}; ++ ++/** ++ * panthor_vm_create() - Create a VM ++ * @ptdev: Device. ++ * @for_mcu: True if this is the FW MCU VM. ++ * @kernel_va_start: Start of the range reserved for kernel BO mapping. ++ * @kernel_va_size: Size of the range reserved for kernel BO mapping. ++ * @auto_kernel_va_start: Start of the auto-VA kernel range. ++ * @auto_kernel_va_size: Size of the auto-VA kernel range. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct panthor_vm * ++panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, ++ u64 kernel_va_start, u64 kernel_va_size, ++ u64 auto_kernel_va_start, u64 auto_kernel_va_size) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); ++ u64 full_va_range = 1ull << va_bits; ++ struct drm_gem_object *dummy_gem; ++ struct drm_gpu_scheduler *sched; ++ struct io_pgtable_cfg pgtbl_cfg; ++ u64 mair, min_va, va_range; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = kzalloc(sizeof(*vm), GFP_KERNEL); ++ if (!vm) ++ return ERR_PTR(-ENOMEM); ++ ++ /* We allocate a dummy GEM for the VM. */ ++ dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base); ++ if (!dummy_gem) { ++ ret = -ENOMEM; ++ goto err_free_vm; ++ } ++ ++ mutex_init(&vm->heaps.lock); ++ vm->for_mcu = for_mcu; ++ vm->ptdev = ptdev; ++ mutex_init(&vm->op_lock); ++ ++ if (for_mcu) { ++ /* CSF MCU is a cortex M7, and can only address 4G */ ++ min_va = 0; ++ va_range = SZ_4G; ++ } else { ++ min_va = 0; ++ va_range = full_va_range; ++ } ++ ++ mutex_init(&vm->mm_lock); ++ drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size); ++ vm->kernel_auto_va.start = auto_kernel_va_start; ++ vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1; ++ ++ INIT_LIST_HEAD(&vm->node); ++ INIT_LIST_HEAD(&vm->as.lru_node); ++ vm->as.id = -1; ++ refcount_set(&vm->as.active_cnt, 0); ++ ++ pgtbl_cfg = (struct io_pgtable_cfg) { ++ .pgsize_bitmap = SZ_4K | SZ_2M, ++ .ias = va_bits, ++ .oas = pa_bits, ++ .coherent_walk = ptdev->coherent, ++ .tlb = &mmu_tlb_ops, ++ .iommu_dev = ptdev->base.dev, ++ .alloc = alloc_pt, ++ .free = free_pt, ++ }; ++ ++ vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm); ++ if (!vm->pgtbl_ops) { ++ ret = -EINVAL; ++ goto err_mm_takedown; ++ } ++ ++ /* Bind operations are synchronous for now, no timeout needed. */ ++ ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq, ++ 1, 1, 0, ++ MAX_SCHEDULE_TIMEOUT, NULL, NULL, ++ "panthor-vm-bind", ptdev->base.dev); ++ if (ret) ++ goto err_free_io_pgtable; ++ ++ sched = &vm->sched; ++ ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL); ++ if (ret) ++ goto err_sched_fini; ++ ++ mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; ++ vm->memattr = mair_to_memattr(mair); ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_add_tail(&vm->node, &ptdev->mmu->vm.list); ++ ++ /* If a reset is in progress, stop the scheduler. */ ++ if (ptdev->mmu->vm.reset_in_progress) ++ panthor_vm_stop(vm); ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ /* We intentionally leave the reserved range to zero, because we want kernel VMAs ++ * to be handled the same way user VMAs are. ++ */ ++ drm_gpuvm_init(&vm->base, for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", ++ DRM_GPUVM_RESV_PROTECTED, &ptdev->base, dummy_gem, ++ min_va, va_range, 0, 0, &panthor_gpuvm_ops); ++ drm_gem_object_put(dummy_gem); ++ return vm; ++ ++err_sched_fini: ++ drm_sched_fini(&vm->sched); ++ ++err_free_io_pgtable: ++ free_io_pgtable_ops(vm->pgtbl_ops); ++ ++err_mm_takedown: ++ drm_mm_takedown(&vm->mm); ++ drm_gem_object_put(dummy_gem); ++ ++err_free_vm: ++ kfree(vm); ++ return ERR_PTR(ret); ++} ++ ++static int ++panthor_vm_bind_prepare_op_ctx(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op, ++ struct panthor_vm_op_ctx *op_ctx) ++{ ++ struct drm_gem_object *gem; ++ int ret; ++ ++ /* Aligned on page size. */ ++ if ((op->va | op->size) & ~PAGE_MASK) ++ return -EINVAL; ++ ++ switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: ++ gem = drm_gem_object_lookup(file, op->bo_handle); ++ ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm, ++ gem ? to_panthor_bo(gem) : NULL, ++ op->bo_offset, ++ op->size, ++ op->va, ++ op->flags); ++ drm_gem_object_put(gem); ++ return ret; ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: ++ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ return -EINVAL; ++ ++ if (op->bo_handle || op->bo_offset) ++ return -EINVAL; ++ ++ return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size); ++ ++ case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: ++ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) ++ return -EINVAL; ++ ++ if (op->bo_handle || op->bo_offset) ++ return -EINVAL; ++ ++ if (op->va || op->size) ++ return -EINVAL; ++ ++ if (!op->syncs.count) ++ return -EINVAL; ++ ++ panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm); ++ return 0; ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work) ++{ ++ struct panthor_vm_bind_job *job = ++ container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work); ++ ++ panthor_vm_bind_job_put(&job->base); ++} ++ ++/** ++ * panthor_vm_bind_job_create() - Create a VM_BIND job ++ * @file: File. ++ * @vm: VM targeted by the VM_BIND job. ++ * @op: VM operation data. ++ * ++ * Return: A valid pointer on success, an ERR_PTR() otherwise. ++ */ ++struct drm_sched_job * ++panthor_vm_bind_job_create(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op) ++{ ++ struct panthor_vm_bind_job *job; ++ int ret; ++ ++ if (!vm) ++ return ERR_PTR(-EINVAL); ++ ++ if (vm->destroyed || vm->unusable) ++ return ERR_PTR(-EINVAL); ++ ++ job = kzalloc(sizeof(*job), GFP_KERNEL); ++ if (!job) ++ return ERR_PTR(-ENOMEM); ++ ++ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx); ++ if (ret) { ++ kfree(job); ++ return ERR_PTR(ret); ++ } ++ ++ INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work); ++ kref_init(&job->refcount); ++ job->vm = panthor_vm_get(vm); ++ ++ ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm); ++ if (ret) ++ goto err_put_job; ++ ++ return &job->base; ++ ++err_put_job: ++ panthor_vm_bind_job_put(&job->base); ++ return ERR_PTR(ret); ++} ++ ++/** ++ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs ++ * @exec: The locking/preparation context. ++ * @sched_job: The job to prepare resvs on. ++ * ++ * Locks and prepare the VM resv. ++ * ++ * If this is a map operation, locks and prepares the GEM resv. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, ++ struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ int ret; ++ ++ /* Acquire the VM lock an reserve a slot for this VM bind job. */ ++ ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1); ++ if (ret) ++ return ret; ++ ++ if (job->ctx.map.vm_bo) { ++ /* Lock/prepare the GEM being mapped. */ ++ ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job ++ * @exec: drm_exec context. ++ * @sched_job: Job to update the resvs on. ++ */ ++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, ++ struct drm_sched_job *sched_job) ++{ ++ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); ++ ++ /* Explicit sync => we just register our job finished fence as bookkeep. */ ++ drm_gpuvm_resv_add_fence(&job->vm->base, exec, ++ &sched_job->s_fence->finished, ++ DMA_RESV_USAGE_BOOKKEEP, ++ DMA_RESV_USAGE_BOOKKEEP); ++} ++ ++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage) ++{ ++ drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage); ++} ++ ++/** ++ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously. ++ * @file: File. ++ * @vm: VM targeted by the VM operation. ++ * @op: Data describing the VM operation. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_bind_exec_sync_op(struct drm_file *file, ++ struct panthor_vm *vm, ++ struct drm_panthor_vm_bind_op *op) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ /* No sync objects allowed on synchronous operations. */ ++ if (op->syncs.count) ++ return -EINVAL; ++ ++ if (!op->size) ++ return 0; ++ ++ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_map_bo_range() - Map a GEM object range to a VM ++ * @vm: VM to map the GEM to. ++ * @bo: GEM object to map. ++ * @offset: Offset in the GEM object. ++ * @size: Size to map. ++ * @va: Virtual address to map the object to. ++ * @flags: Combination of drm_panthor_vm_bind_op_flags flags. ++ * Only map-related flags are valid. ++ * ++ * Internal use only. For userspace requests, use ++ * panthor_vm_bind_exec_sync_op() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, ++ u64 offset, u64 size, u64 va, u32 flags) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_unmap_range() - Unmap a portion of the VA space ++ * @vm: VM to unmap the region from. ++ * @va: Virtual address to unmap. Must be 4k aligned. ++ * @size: Size of the region to unmap. Must be 4k aligned. ++ * ++ * Internal use only. For userspace requests, use ++ * panthor_vm_bind_exec_sync_op() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size) ++{ ++ struct panthor_vm_op_ctx op_ctx; ++ int ret; ++ ++ ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size); ++ if (ret) ++ return ret; ++ ++ ret = panthor_vm_exec_op(vm, &op_ctx, false); ++ panthor_vm_cleanup_op_ctx(&op_ctx, vm); ++ ++ return ret; ++} ++ ++/** ++ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs. ++ * @exec: Locking/preparation context. ++ * @vm: VM targeted by the GPU job. ++ * @slot_count: Number of slots to reserve. ++ * ++ * GPU jobs assume all BOs bound to the VM at the time the job is submitted ++ * are available when the job is executed. In order to guarantee that, we ++ * need to reserve a slot on all BOs mapped to a VM and update this slot with ++ * the job fence after its submission. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm, ++ u32 slot_count) ++{ ++ int ret; ++ ++ /* Acquire the VM lock and reserve a slot for this GPU job. */ ++ ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count); ++ if (ret) ++ return ret; ++ ++ return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count); ++} ++ ++/** ++ * panthor_mmu_unplug() - Unplug the MMU logic ++ * @ptdev: Device. ++ * ++ * No access to the MMU regs should be done after this function is called. ++ * We suspend the IRQ and disable all VMs to guarantee that. ++ */ ++void panthor_mmu_unplug(struct panthor_device *ptdev) ++{ ++ panthor_mmu_irq_suspend(&ptdev->mmu->irq); ++ ++ mutex_lock(&ptdev->mmu->as.slots_lock); ++ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { ++ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; ++ ++ if (vm) { ++ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); ++ panthor_vm_release_as_locked(vm); ++ } ++ } ++ mutex_unlock(&ptdev->mmu->as.slots_lock); ++} ++ ++static void panthor_mmu_release_wq(struct drm_device *ddev, void *res) ++{ ++ destroy_workqueue(res); ++} ++ ++/** ++ * panthor_mmu_init() - Initialize the MMU logic. ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_mmu_init(struct panthor_device *ptdev) ++{ ++ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); ++ struct panthor_mmu *mmu; ++ int ret, irq; ++ ++ mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL); ++ if (!mmu) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&mmu->as.lru_list); ++ ++ ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock); ++ if (ret) ++ return ret; ++ ++ INIT_LIST_HEAD(&mmu->vm.list); ++ ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock); ++ if (ret) ++ return ret; ++ ++ ptdev->mmu = mmu; ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu"); ++ if (irq <= 0) ++ return -ENODEV; ++ ++ ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq, ++ panthor_mmu_fault_mask(ptdev, ~0)); ++ if (ret) ++ return ret; ++ ++ mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0); ++ if (!mmu->vm.wq) ++ return -ENOMEM; ++ ++ /* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction, ++ * which passes iova as an unsigned long. Patch the mmu_features to reflect this ++ * limitation. ++ */ ++ if (sizeof(unsigned long) * 8 < va_bits) { ++ ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0); ++ ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8; ++ } ++ ++ return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq); ++} ++ ++#ifdef CONFIG_DEBUG_FS ++static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m) ++{ ++ int ret; ++ ++ mutex_lock(&vm->op_lock); ++ ret = drm_debugfs_gpuva_info(m, &vm->base); ++ mutex_unlock(&vm->op_lock); ++ ++ return ret; ++} ++ ++static int show_each_vm(struct seq_file *m, void *arg) ++{ ++ struct drm_info_node *node = (struct drm_info_node *)m->private; ++ struct drm_device *ddev = node->minor->dev; ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data; ++ struct panthor_vm *vm; ++ int ret = 0; ++ ++ mutex_lock(&ptdev->mmu->vm.lock); ++ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { ++ ret = show(vm, m); ++ if (ret < 0) ++ break; ++ ++ seq_puts(m, "\n"); ++ } ++ mutex_unlock(&ptdev->mmu->vm.lock); ++ ++ return ret; ++} ++ ++static struct drm_info_list panthor_mmu_debugfs_list[] = { ++ DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas), ++}; ++ ++/** ++ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries ++ * @minor: Minor. ++ */ ++void panthor_mmu_debugfs_init(struct drm_minor *minor) ++{ ++ drm_debugfs_create_files(panthor_mmu_debugfs_list, ++ ARRAY_SIZE(panthor_mmu_debugfs_list), ++ minor->debugfs_root, minor); ++} ++#endif /* CONFIG_DEBUG_FS */ ++ ++/** ++ * panthor_mmu_pt_cache_init() - Initialize the page table cache. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_mmu_pt_cache_init(void) ++{ ++ pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL); ++ if (!pt_cache) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * panthor_mmu_pt_cache_fini() - Destroy the page table cache. ++ */ ++void panthor_mmu_pt_cache_fini(void) ++{ ++ kmem_cache_destroy(pt_cache); ++} +diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h +new file mode 100644 +index 000000000000..f3c1ed19f973 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_mmu.h +@@ -0,0 +1,102 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2019 Linaro, Ltd, Rob Herring */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_MMU_H__ ++#define __PANTHOR_MMU_H__ ++ ++#include ++ ++struct drm_exec; ++struct drm_sched_job; ++struct panthor_gem_object; ++struct panthor_heap_pool; ++struct panthor_vm; ++struct panthor_vma; ++struct panthor_mmu; ++ ++int panthor_mmu_init(struct panthor_device *ptdev); ++void panthor_mmu_unplug(struct panthor_device *ptdev); ++void panthor_mmu_pre_reset(struct panthor_device *ptdev); ++void panthor_mmu_post_reset(struct panthor_device *ptdev); ++void panthor_mmu_suspend(struct panthor_device *ptdev); ++void panthor_mmu_resume(struct panthor_device *ptdev); ++ ++int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, ++ u64 offset, u64 size, u64 va, u32 flags); ++int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size); ++struct panthor_gem_object * ++panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset); ++ ++int panthor_vm_active(struct panthor_vm *vm); ++void panthor_vm_idle(struct panthor_vm *vm); ++int panthor_vm_as(struct panthor_vm *vm); ++ ++struct panthor_heap_pool * ++panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); ++ ++struct panthor_vm *panthor_vm_get(struct panthor_vm *vm); ++void panthor_vm_put(struct panthor_vm *vm); ++struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, ++ u64 kernel_va_start, u64 kernel_va_size, ++ u64 kernel_auto_va_start, ++ u64 kernel_auto_va_size); ++ ++int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, ++ struct panthor_vm *vm, ++ u32 slot_count); ++int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm, ++ struct drm_sched_job *job); ++void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm, ++ struct drm_sched_job *job); ++ ++struct dma_resv *panthor_vm_resv(struct panthor_vm *vm); ++struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm); ++ ++void panthor_vm_pool_destroy(struct panthor_file *pfile); ++int panthor_vm_pool_create(struct panthor_file *pfile); ++int panthor_vm_pool_create_vm(struct panthor_device *ptdev, ++ struct panthor_vm_pool *pool, ++ struct drm_panthor_vm_create *args); ++int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle); ++struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle); ++ ++bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm); ++bool panthor_vm_is_unusable(struct panthor_vm *vm); ++ ++/* ++ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM ++ * logic to auto-allocate the virtual address in the reserved kernel VA range. ++ */ ++#define PANTHOR_VM_KERNEL_AUTO_VA ~0ull ++ ++int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, ++ struct drm_mm_node *va_node); ++void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node); ++ ++int panthor_vm_bind_exec_sync_op(struct drm_file *file, ++ struct panthor_vm *vm, ++ struct drm_panthor_vm_bind_op *op); ++ ++struct drm_sched_job * ++panthor_vm_bind_job_create(struct drm_file *file, ++ struct panthor_vm *vm, ++ const struct drm_panthor_vm_bind_op *op); ++void panthor_vm_bind_job_put(struct drm_sched_job *job); ++int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, ++ struct drm_sched_job *job); ++void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); ++ ++void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, ++ struct dma_fence *fence, ++ enum dma_resv_usage private_usage, ++ enum dma_resv_usage extobj_usage); ++ ++int panthor_mmu_pt_cache_init(void); ++void panthor_mmu_pt_cache_fini(void); ++ ++#ifdef CONFIG_DEBUG_FS ++void panthor_mmu_debugfs_init(struct drm_minor *minor); ++#endif ++ ++#endif +-- +2.42.0 + + +From c1d00b19c2fd1b30e05f7a683e057b37935d3701 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:22 +0100 +Subject: [PATCH 10/71] [MERGED] drm/panthor: Add the FW logical block + +Contains everything that's FW related, that includes the code dealing +with the microcontroller unit (MCU) that's running the FW, and anything +related to allocating memory shared between the FW and the CPU. + +A few global FW events are processed in the IRQ handler, the rest is +forwarded to the scheduler, since scheduling is the primary reason for +the FW existence, and also the main source of FW <-> kernel +interactions. + +v6: +- Add Maxime's and Heiko's acks +- Keep header inclusion alphabetically ordered + +v5: +- Fix typo in GLB_PERFCNT_SAMPLE definition +- Fix unbalanced panthor_vm_idle/active() calls +- Fallback to a slow reset when the fast reset fails +- Add extra information when reporting a FW boot failure + +v4: +- Add a MODULE_FIRMWARE() entry for gen 10.8 +- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry() +- Fix typos +- Add Steve's R-b + +v3: +- Make the FW path more future-proof (Liviu) +- Use one waitqueue for all FW events +- Simplify propagation of FW events to the scheduler logic +- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead +- Account for the panthor_vm changes +- Replace magic number with 0x7fffffff with ~0 to better signify that + it's the maximum permitted value. +- More accurate rounding when computing the firmware timeout. +- Add a 'sub iterator' helper function. This also adds a check that a + firmware entry doesn't overflow the firmware image. +- Drop __packed from FW structures, natural alignment is good enough. +- Other minor code improvements. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-9-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_fw.c | 1362 ++++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_fw.h | 503 ++++++++++ + 2 files changed, 1865 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c + create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h + +diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c +new file mode 100644 +index 000000000000..33c87a59834e +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_fw.c +@@ -0,0 +1,1362 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifdef CONFIG_ARM_ARCH_TIMER ++#include ++#endif ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++#define CSF_FW_NAME "mali_csffw.bin" ++ ++#define PING_INTERVAL_MS 12000 ++#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) ++#define PROGRESS_TIMEOUT_SCALE_SHIFT 10 ++#define IDLE_HYSTERESIS_US 800 ++#define PWROFF_HYSTERESIS_US 10000 ++ ++/** ++ * struct panthor_fw_binary_hdr - Firmware binary header. ++ */ ++struct panthor_fw_binary_hdr { ++ /** @magic: Magic value to check binary validity. */ ++ u32 magic; ++#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e ++ ++ /** @minor: Minor FW version. */ ++ u8 minor; ++ ++ /** @major: Major FW version. */ ++ u8 major; ++#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 ++ ++ /** @padding1: MBZ. */ ++ u16 padding1; ++ ++ /** @version_hash: FW version hash. */ ++ u32 version_hash; ++ ++ /** @padding2: MBZ. */ ++ u32 padding2; ++ ++ /** @size: FW binary size. */ ++ u32 size; ++}; ++ ++/** ++ * enum panthor_fw_binary_entry_type - Firmware binary entry type ++ */ ++enum panthor_fw_binary_entry_type { ++ /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ ++ CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ ++ CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, ++ ++ /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ ++ CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, ++}; ++ ++#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) ++#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) ++#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) ++#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) ++ ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) ++ ++#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ ++ (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ ++ CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) ++ ++/** ++ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary ++ */ ++struct panthor_fw_binary_section_entry_hdr { ++ /** @flags: Section flags. */ ++ u32 flags; ++ ++ /** @va: MCU virtual range to map this binary section to. */ ++ struct { ++ /** @start: Start address. */ ++ u32 start; ++ ++ /** @end: End address. */ ++ u32 end; ++ } va; ++ ++ /** @data: Data to initialize the FW section with. */ ++ struct { ++ /** @start: Start offset in the FW binary. */ ++ u32 start; ++ ++ /** @end: End offset in the FW binary. */ ++ u32 end; ++ } data; ++}; ++ ++/** ++ * struct panthor_fw_binary_iter - Firmware binary iterator ++ * ++ * Used to parse a firmware binary. ++ */ ++struct panthor_fw_binary_iter { ++ /** @data: FW binary data. */ ++ const void *data; ++ ++ /** @size: FW binary size. */ ++ size_t size; ++ ++ /** @offset: Iterator offset. */ ++ size_t offset; ++}; ++ ++/** ++ * struct panthor_fw_section - FW section ++ */ ++struct panthor_fw_section { ++ /** @node: Used to keep track of FW sections. */ ++ struct list_head node; ++ ++ /** @flags: Section flags, as encoded in the FW binary. */ ++ u32 flags; ++ ++ /** @mem: Section memory. */ ++ struct panthor_kernel_bo *mem; ++ ++ /** ++ * @name: Name of the section, as specified in the binary. ++ * ++ * Can be NULL. ++ */ ++ const char *name; ++ ++ /** ++ * @data: Initial data copied to the FW memory. ++ * ++ * We keep data around so we can reload sections after a reset. ++ */ ++ struct { ++ /** @buf: Buffed used to store init data. */ ++ const void *buf; ++ ++ /** @size: Size of @buf in bytes. */ ++ size_t size; ++ } data; ++}; ++ ++#define CSF_MCU_SHARED_REGION_START 0x04000000ULL ++#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL ++ ++#define MIN_CS_PER_CSG 8 ++#define MIN_CSGS 3 ++#define MAX_CSG_PRIO 0xf ++ ++#define CSF_IFACE_VERSION(major, minor, patch) \ ++ (((major) << 24) | ((minor) << 16) | (patch)) ++#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) ++#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) ++#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) ++ ++#define CSF_GROUP_CONTROL_OFFSET 0x1000 ++#define CSF_STREAM_CONTROL_OFFSET 0x40 ++#define CSF_UNPRESERVED_REG_COUNT 4 ++ ++/** ++ * struct panthor_fw_iface - FW interfaces ++ */ ++struct panthor_fw_iface { ++ /** @global: Global interface. */ ++ struct panthor_fw_global_iface global; ++ ++ /** @groups: Group slot interfaces. */ ++ struct panthor_fw_csg_iface groups[MAX_CSGS]; ++ ++ /** @streams: Command stream slot interfaces. */ ++ struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; ++}; ++ ++/** ++ * struct panthor_fw - Firmware management ++ */ ++struct panthor_fw { ++ /** @vm: MCU VM. */ ++ struct panthor_vm *vm; ++ ++ /** @sections: List of FW sections. */ ++ struct list_head sections; ++ ++ /** @shared_section: The section containing the FW interfaces. */ ++ struct panthor_fw_section *shared_section; ++ ++ /** @iface: FW interfaces. */ ++ struct panthor_fw_iface iface; ++ ++ /** @watchdog: Collection of fields relating to the FW watchdog. */ ++ struct { ++ /** @ping_work: Delayed work used to ping the FW. */ ++ struct delayed_work ping_work; ++ } watchdog; ++ ++ /** ++ * @req_waitqueue: FW request waitqueue. ++ * ++ * Everytime a request is sent to a command stream group or the global ++ * interface, the caller will first busy wait for the request to be ++ * acknowledged, and then fallback to a sleeping wait. ++ * ++ * This wait queue is here to support the sleeping wait flavor. ++ */ ++ wait_queue_head_t req_waitqueue; ++ ++ /** @booted: True is the FW is booted */ ++ bool booted; ++ ++ /** ++ * @fast_reset: True if the post_reset logic can proceed with a fast reset. ++ * ++ * A fast reset is just a reset where the driver doesn't reload the FW sections. ++ * ++ * Any time the firmware is properly suspended, a fast reset can take place. ++ * On the other hand, if the halt operation failed, the driver will reload ++ * all sections to make sure we start from a fresh state. ++ */ ++ bool fast_reset; ++ ++ /** @irq: Job irq data. */ ++ struct panthor_irq irq; ++}; ++ ++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) ++{ ++ return ptdev->fw->vm; ++} ++ ++/** ++ * panthor_fw_get_glb_iface() - Get the global interface ++ * @ptdev: Device. ++ * ++ * Return: The global interface. ++ */ ++struct panthor_fw_global_iface * ++panthor_fw_get_glb_iface(struct panthor_device *ptdev) ++{ ++ return &ptdev->fw->iface.global; ++} ++ ++/** ++ * panthor_fw_get_csg_iface() - Get a command stream group slot interface ++ * @ptdev: Device. ++ * @csg_slot: Index of the command stream group slot. ++ * ++ * Return: The command stream group slot interface. ++ */ ++struct panthor_fw_csg_iface * ++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) ++{ ++ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) ++ return NULL; ++ ++ return &ptdev->fw->iface.groups[csg_slot]; ++} ++ ++/** ++ * panthor_fw_get_cs_iface() - Get a command stream slot interface ++ * @ptdev: Device. ++ * @csg_slot: Index of the command stream group slot. ++ * @cs_slot: Index of the command stream slot. ++ * ++ * Return: The command stream slot interface. ++ */ ++struct panthor_fw_cs_iface * ++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) ++{ ++ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG)) ++ return NULL; ++ ++ return &ptdev->fw->iface.streams[csg_slot][cs_slot]; ++} ++ ++/** ++ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count ++ * @ptdev: Device. ++ * @timeout_us: Timeout expressed in micro-seconds. ++ * ++ * The FW has two timer sources: the GPU counter or arch-timer. We need ++ * to express timeouts in term of number of cycles and specify which ++ * timer source should be used. ++ * ++ * Return: A value suitable for timeout fields in the global interface. ++ */ ++static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) ++{ ++ bool use_cycle_counter = false; ++ u32 timer_rate = 0; ++ u64 mod_cycles; ++ ++#ifdef CONFIG_ARM_ARCH_TIMER ++ timer_rate = arch_timer_get_cntfrq(); ++#endif ++ ++ if (!timer_rate) { ++ use_cycle_counter = true; ++ timer_rate = clk_get_rate(ptdev->clks.core); ++ } ++ ++ if (drm_WARN_ON(&ptdev->base, !timer_rate)) { ++ /* We couldn't get a valid clock rate, let's just pick the ++ * maximum value so the FW still handles the core ++ * power on/off requests. ++ */ ++ return GLB_TIMER_VAL(~0) | ++ GLB_TIMER_SOURCE_GPU_COUNTER; ++ } ++ ++ mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, ++ 1000000ull << 10); ++ if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) ++ mod_cycles = GLB_TIMER_VAL(~0); ++ ++ return GLB_TIMER_VAL(mod_cycles) | ++ (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); ++} ++ ++static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, ++ struct panthor_fw_binary_iter *iter, ++ void *out, size_t size) ++{ ++ size_t new_offset = iter->offset + size; ++ ++ if (new_offset > iter->size || new_offset < iter->offset) { ++ drm_err(&ptdev->base, "Firmware too small\n"); ++ return -EINVAL; ++ } ++ ++ memcpy(out, iter->data + iter->offset, size); ++ iter->offset = new_offset; ++ return 0; ++} ++ ++static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, ++ struct panthor_fw_binary_iter *iter, ++ struct panthor_fw_binary_iter *sub_iter, ++ size_t size) ++{ ++ size_t new_offset = iter->offset + size; ++ ++ if (new_offset > iter->size || new_offset < iter->offset) { ++ drm_err(&ptdev->base, "Firmware entry too long\n"); ++ return -EINVAL; ++ } ++ ++ sub_iter->offset = 0; ++ sub_iter->data = iter->data + iter->offset; ++ sub_iter->size = size; ++ iter->offset = new_offset; ++ return 0; ++} ++ ++static void panthor_fw_init_section_mem(struct panthor_device *ptdev, ++ struct panthor_fw_section *section) ++{ ++ bool was_mapped = !!section->mem->kmap; ++ int ret; ++ ++ if (!section->data.size && ++ !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) ++ return; ++ ++ ret = panthor_kernel_bo_vmap(section->mem); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ return; ++ ++ memcpy(section->mem->kmap, section->data.buf, section->data.size); ++ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { ++ memset(section->mem->kmap + section->data.size, 0, ++ panthor_kernel_bo_size(section->mem) - section->data.size); ++ } ++ ++ if (!was_mapped) ++ panthor_kernel_bo_vunmap(section->mem); ++} ++ ++/** ++ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. ++ * @ptdev: Device. ++ * @input: Pointer holding the input interface on success. ++ * Should be ignored on failure. ++ * @output: Pointer holding the output interface on success. ++ * Should be ignored on failure. ++ * @input_fw_va: Pointer holding the input interface FW VA on success. ++ * Should be ignored on failure. ++ * @output_fw_va: Pointer holding the output interface FW VA on success. ++ * Should be ignored on failure. ++ * ++ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input ++ * interface is at offset 0, and the output interface at offset 4096. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, ++ struct panthor_fw_ringbuf_input_iface **input, ++ const struct panthor_fw_ringbuf_output_iface **output, ++ u32 *input_fw_va, u32 *output_fw_va) ++{ ++ struct panthor_kernel_bo *mem; ++ int ret; ++ ++ mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(mem)) ++ return mem; ++ ++ ret = panthor_kernel_bo_vmap(mem); ++ if (ret) { ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem); ++ return ERR_PTR(ret); ++ } ++ ++ memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); ++ *input = mem->kmap; ++ *output = mem->kmap + SZ_4K; ++ *input_fw_va = panthor_kernel_bo_gpuva(mem); ++ *output_fw_va = *input_fw_va + SZ_4K; ++ ++ return mem; ++} ++ ++/** ++ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. ++ * @ptdev: Device. ++ * @size: Size of the suspend buffer. ++ * ++ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. ++ */ ++struct panthor_kernel_bo * ++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) ++{ ++ return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++} ++ ++static int panthor_fw_load_section_entry(struct panthor_device *ptdev, ++ const struct firmware *fw, ++ struct panthor_fw_binary_iter *iter, ++ u32 ehdr) ++{ ++ struct panthor_fw_binary_section_entry_hdr hdr; ++ struct panthor_fw_section *section; ++ u32 section_size; ++ u32 name_len; ++ int ret; ++ ++ ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); ++ if (ret) ++ return ret; ++ ++ if (hdr.data.end < hdr.data.start) { ++ drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", ++ hdr.data.end, hdr.data.start); ++ return -EINVAL; ++ } ++ ++ if (hdr.va.end < hdr.va.start) { ++ drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", ++ hdr.va.end, hdr.va.start); ++ return -EINVAL; ++ } ++ ++ if (hdr.data.end > fw->size) { ++ drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", ++ hdr.data.end, fw->size); ++ return -EINVAL; ++ } ++ ++ if ((hdr.va.start & ~PAGE_MASK) != 0 || ++ (hdr.va.end & ~PAGE_MASK) != 0) { ++ drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", ++ hdr.va.start, hdr.va.end); ++ return -EINVAL; ++ } ++ ++ if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { ++ drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", ++ hdr.flags); ++ return -EINVAL; ++ } ++ ++ if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { ++ drm_warn(&ptdev->base, ++ "Firmware protected mode entry not be supported, ignoring"); ++ return 0; ++ } ++ ++ if (hdr.va.start == CSF_MCU_SHARED_REGION_START && ++ !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { ++ drm_err(&ptdev->base, ++ "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); ++ return -EINVAL; ++ } ++ ++ name_len = iter->size - iter->offset; ++ ++ section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); ++ if (!section) ++ return -ENOMEM; ++ ++ list_add_tail(§ion->node, &ptdev->fw->sections); ++ section->flags = hdr.flags; ++ section->data.size = hdr.data.end - hdr.data.start; ++ ++ if (section->data.size > 0) { ++ void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); ++ ++ if (!data) ++ return -ENOMEM; ++ ++ memcpy(data, fw->data + hdr.data.start, section->data.size); ++ section->data.buf = data; ++ } ++ ++ if (name_len > 0) { ++ char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); ++ ++ if (!name) ++ return -ENOMEM; ++ ++ memcpy(name, iter->data + iter->offset, name_len); ++ name[name_len] = '\0'; ++ section->name = name; ++ } ++ ++ section_size = hdr.va.end - hdr.va.start; ++ if (section_size) { ++ u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; ++ struct panthor_gem_object *bo; ++ u32 vm_map_flags = 0; ++ struct sg_table *sgt; ++ u64 va = hdr.va.start; ++ ++ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; ++ ++ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; ++ ++ /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to ++ * non-cacheable for now. We might want to introduce a new ++ * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device ++ * memory and is currently not used by our driver) for ++ * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit ++ * of IO-coherent systems. ++ */ ++ if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) ++ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; ++ ++ section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), ++ section_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ vm_map_flags, va); ++ if (IS_ERR(section->mem)) ++ return PTR_ERR(section->mem); ++ ++ if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) ++ return -EINVAL; ++ ++ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { ++ ret = panthor_kernel_bo_vmap(section->mem); ++ if (ret) ++ return ret; ++ } ++ ++ panthor_fw_init_section_mem(ptdev, section); ++ ++ bo = to_panthor_bo(section->mem->obj); ++ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); ++ if (IS_ERR(sgt)) ++ return PTR_ERR(sgt); ++ ++ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); ++ } ++ ++ if (hdr.va.start == CSF_MCU_SHARED_REGION_START) ++ ptdev->fw->shared_section = section; ++ ++ return 0; ++} ++ ++static void ++panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) ++{ ++ struct panthor_fw_section *section; ++ ++ list_for_each_entry(section, &ptdev->fw->sections, node) { ++ struct sg_table *sgt; ++ ++ if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) ++ continue; ++ ++ panthor_fw_init_section_mem(ptdev, section); ++ sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); ++ if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) ++ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); ++ } ++} ++ ++static int panthor_fw_load_entry(struct panthor_device *ptdev, ++ const struct firmware *fw, ++ struct panthor_fw_binary_iter *iter) ++{ ++ struct panthor_fw_binary_iter eiter; ++ u32 ehdr; ++ int ret; ++ ++ ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); ++ if (ret) ++ return ret; ++ ++ if ((iter->offset % sizeof(u32)) || ++ (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { ++ drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", ++ (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); ++ return -EINVAL; ++ } ++ ++ if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, ++ CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) ++ return -EINVAL; ++ ++ switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { ++ case CSF_FW_BINARY_ENTRY_TYPE_IFACE: ++ return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); ++ ++ /* FIXME: handle those entry types? */ ++ case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: ++ case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: ++ case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: ++ case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: ++ return 0; ++ default: ++ break; ++ } ++ ++ if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) ++ return 0; ++ ++ drm_err(&ptdev->base, ++ "Unsupported non-optional entry type %u in firmware\n", ++ CSF_FW_BINARY_ENTRY_TYPE(ehdr)); ++ return -EINVAL; ++} ++ ++static int panthor_fw_load(struct panthor_device *ptdev) ++{ ++ const struct firmware *fw = NULL; ++ struct panthor_fw_binary_iter iter = {}; ++ struct panthor_fw_binary_hdr hdr; ++ char fw_path[128]; ++ int ret; ++ ++ snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", ++ (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), ++ (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), ++ CSF_FW_NAME); ++ ++ ret = request_firmware(&fw, fw_path, ptdev->base.dev); ++ if (ret) { ++ drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", ++ CSF_FW_NAME); ++ return ret; ++ } ++ ++ iter.data = fw->data; ++ iter.size = fw->size; ++ ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); ++ if (ret) ++ goto out; ++ ++ if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { ++ ret = -EINVAL; ++ drm_err(&ptdev->base, "Invalid firmware magic\n"); ++ goto out; ++ } ++ ++ if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { ++ ret = -EINVAL; ++ drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", ++ hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); ++ goto out; ++ } ++ ++ if (hdr.size > iter.size) { ++ drm_err(&ptdev->base, "Firmware image is truncated\n"); ++ goto out; ++ } ++ ++ iter.size = hdr.size; ++ ++ while (iter.offset < hdr.size) { ++ ret = panthor_fw_load_entry(ptdev, fw, &iter); ++ if (ret) ++ goto out; ++ } ++ ++ if (!ptdev->fw->shared_section) { ++ drm_err(&ptdev->base, "Shared interface region not found\n"); ++ ret = -EINVAL; ++ goto out; ++ } ++ ++out: ++ release_firmware(fw); ++ return ret; ++} ++ ++/** ++ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address ++ * @ptdev: Device. ++ * @mcu_va: MCU address. ++ * ++ * Return: NULL if the address is not part of the shared section, non-NULL otherwise. ++ */ ++static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) ++{ ++ u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); ++ u64 shared_mem_end = shared_mem_start + ++ panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) ++ return NULL; ++ ++ return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); ++} ++ ++static int panthor_init_cs_iface(struct panthor_device *ptdev, ++ unsigned int csg_idx, unsigned int cs_idx) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); ++ struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; ++ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + ++ (csg_idx * glb_iface->control->group_stride) + ++ CSF_STREAM_CONTROL_OFFSET + ++ (cs_idx * csg_iface->control->stream_stride); ++ struct panthor_fw_cs_iface *first_cs_iface = ++ panthor_fw_get_cs_iface(ptdev, 0, 0); ++ ++ if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) ++ return -EINVAL; ++ ++ spin_lock_init(&cs_iface->lock); ++ cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; ++ cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); ++ cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); ++ ++ if (!cs_iface->input || !cs_iface->output) { ++ drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (cs_iface != first_cs_iface) { ++ if (cs_iface->control->features != first_cs_iface->control->features) { ++ drm_err(&ptdev->base, "Expecting identical CS slots"); ++ return -EINVAL; ++ } ++ } else { ++ u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); ++ ++ ptdev->csif_info.cs_reg_count = reg_count; ++ ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; ++ } ++ ++ return 0; ++} ++ ++static bool compare_csg(const struct panthor_fw_csg_control_iface *a, ++ const struct panthor_fw_csg_control_iface *b) ++{ ++ if (a->features != b->features) ++ return false; ++ if (a->suspend_size != b->suspend_size) ++ return false; ++ if (a->protm_suspend_size != b->protm_suspend_size) ++ return false; ++ if (a->stream_num != b->stream_num) ++ return false; ++ return true; ++} ++ ++static int panthor_init_csg_iface(struct panthor_device *ptdev, ++ unsigned int csg_idx) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; ++ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); ++ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); ++ unsigned int i; ++ ++ if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) ++ return -EINVAL; ++ ++ spin_lock_init(&csg_iface->lock); ++ csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; ++ csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); ++ csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); ++ ++ if (csg_iface->control->stream_num < MIN_CS_PER_CSG || ++ csg_iface->control->stream_num > MAX_CS_PER_CSG) ++ return -EINVAL; ++ ++ if (!csg_iface->input || !csg_iface->output) { ++ drm_err(&ptdev->base, "Invalid group control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (csg_idx > 0) { ++ struct panthor_fw_csg_iface *first_csg_iface = ++ panthor_fw_get_csg_iface(ptdev, 0); ++ ++ if (!compare_csg(first_csg_iface->control, csg_iface->control)) { ++ drm_err(&ptdev->base, "Expecting identical CSG slots"); ++ return -EINVAL; ++ } ++ } ++ ++ for (i = 0; i < csg_iface->control->stream_num; i++) { ++ int ret = panthor_init_cs_iface(ptdev, csg_idx, i); ++ ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static u32 panthor_get_instr_features(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) ++ return 0; ++ ++ return glb_iface->control->instr_features; ++} ++ ++static int panthor_fw_init_ifaces(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; ++ unsigned int i; ++ ++ if (!ptdev->fw->shared_section->mem->kmap) ++ return -EINVAL; ++ ++ spin_lock_init(&glb_iface->lock); ++ glb_iface->control = ptdev->fw->shared_section->mem->kmap; ++ ++ if (!glb_iface->control->version) { ++ drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); ++ return -EINVAL; ++ } ++ ++ glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); ++ glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); ++ if (!glb_iface->input || !glb_iface->output) { ++ drm_err(&ptdev->base, "Invalid global control interface input/output VA"); ++ return -EINVAL; ++ } ++ ++ if (glb_iface->control->group_num > MAX_CSGS || ++ glb_iface->control->group_num < MIN_CSGS) { ++ drm_err(&ptdev->base, "Invalid number of control groups"); ++ return -EINVAL; ++ } ++ ++ for (i = 0; i < glb_iface->control->group_num; i++) { ++ int ret = panthor_init_csg_iface(ptdev, i); ++ ++ if (ret) ++ return ret; ++ } ++ ++ drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x", ++ CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), ++ CSF_IFACE_VERSION_MINOR(glb_iface->control->version), ++ CSF_IFACE_VERSION_PATCH(glb_iface->control->version), ++ glb_iface->control->features, ++ panthor_get_instr_features(ptdev)); ++ return 0; ++} ++ ++static void panthor_fw_init_global_iface(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ /* Enable all cores. */ ++ glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; ++ ++ /* Setup timers. */ ++ glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); ++ glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; ++ glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); ++ ++ /* Enable interrupts we care about. */ ++ glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | ++ GLB_PING | ++ GLB_CFG_PROGRESS_TIMER | ++ GLB_CFG_POWEROFF_TIMER | ++ GLB_IDLE_EN | ++ GLB_IDLE; ++ ++ panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); ++ panthor_fw_toggle_reqs(glb_iface, req, ack, ++ GLB_CFG_ALLOC_EN | ++ GLB_CFG_POWEROFF_TIMER | ++ GLB_CFG_PROGRESS_TIMER); ++ ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ ++ /* Kick the watchdog. */ ++ mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, ++ msecs_to_jiffies(PING_INTERVAL_MS)); ++} ++ ++static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) ++{ ++ if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) ++ ptdev->fw->booted = true; ++ ++ wake_up_all(&ptdev->fw->req_waitqueue); ++ ++ /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ ++ if (!ptdev->fw->booted) ++ return; ++ ++ panthor_sched_report_fw_events(ptdev, status); ++} ++PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); ++ ++static int panthor_fw_start(struct panthor_device *ptdev) ++{ ++ bool timedout = false; ++ ++ ptdev->fw->booted = false; ++ panthor_job_irq_resume(&ptdev->fw->irq, ~0); ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); ++ ++ if (!wait_event_timeout(ptdev->fw->req_waitqueue, ++ ptdev->fw->booted, ++ msecs_to_jiffies(1000))) { ++ if (!ptdev->fw->booted && ++ !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) ++ timedout = true; ++ } ++ ++ if (timedout) { ++ static const char * const status_str[] = { ++ [MCU_STATUS_DISABLED] = "disabled", ++ [MCU_STATUS_ENABLED] = "enabled", ++ [MCU_STATUS_HALT] = "halt", ++ [MCU_STATUS_FATAL] = "fatal", ++ }; ++ u32 status = gpu_read(ptdev, MCU_STATUS); ++ ++ drm_err(&ptdev->base, "Failed to boot MCU (status=%s)", ++ status < ARRAY_SIZE(status_str) ? status_str[status] : "unknown"); ++ return -ETIMEDOUT; ++ } ++ ++ return 0; ++} ++ ++static void panthor_fw_stop(struct panthor_device *ptdev) ++{ ++ u32 status; ++ ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); ++ if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, ++ status == MCU_STATUS_DISABLED, 10, 100000)) ++ drm_err(&ptdev->base, "Failed to stop MCU"); ++} ++ ++/** ++ * panthor_fw_pre_reset() - Call before a reset. ++ * @ptdev: Device. ++ * @on_hang: true if the reset was triggered on a GPU hang. ++ * ++ * If the reset is not triggered on a hang, we try to gracefully halt the ++ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. ++ */ ++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) ++{ ++ /* Make sure we won't be woken up by a ping. */ ++ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); ++ ++ ptdev->fw->fast_reset = false; ++ ++ if (!on_hang) { ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 status; ++ ++ panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, ++ status == MCU_STATUS_HALT, 10, 100000) && ++ glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { ++ ptdev->fw->fast_reset = true; ++ } else { ++ drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); ++ } ++ ++ /* The FW detects 0 -> 1 transitions. Make sure we reset ++ * the HALT bit before the FW is rebooted. ++ */ ++ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); ++ } ++ ++ panthor_job_irq_suspend(&ptdev->fw->irq); ++} ++ ++/** ++ * panthor_fw_post_reset() - Call after a reset. ++ * @ptdev: Device. ++ * ++ * Start the FW. If this is not a fast reset, all FW sections are reloaded to ++ * make sure we can recover from a memory corruption. ++ */ ++int panthor_fw_post_reset(struct panthor_device *ptdev) ++{ ++ int ret; ++ ++ /* Make the MCU VM active. */ ++ ret = panthor_vm_active(ptdev->fw->vm); ++ if (ret) ++ return ret; ++ ++ /* If this is a fast reset, try to start the MCU without reloading ++ * the FW sections. If it fails, go for a full reset. ++ */ ++ if (ptdev->fw->fast_reset) { ++ ret = panthor_fw_start(ptdev); ++ if (!ret) ++ goto out; ++ ++ /* Force a disable, so we get a fresh boot on the next ++ * panthor_fw_start() call. ++ */ ++ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); ++ drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); ++ } ++ ++ /* Reload all sections, including RO ones. We're not supposed ++ * to end up here anyway, let's just assume the overhead of ++ * reloading everything is acceptable. ++ */ ++ panthor_reload_fw_sections(ptdev, true); ++ ++ ret = panthor_fw_start(ptdev); ++ if (ret) { ++ drm_err(&ptdev->base, "FW slow reset failed"); ++ return ret; ++ } ++ ++out: ++ /* We must re-initialize the global interface even on fast-reset. */ ++ panthor_fw_init_global_iface(ptdev); ++ return 0; ++} ++ ++/** ++ * panthor_fw_unplug() - Called when the device is unplugged. ++ * @ptdev: Device. ++ * ++ * This function must make sure all pending operations are flushed before ++ * will release device resources, thus preventing any interaction with ++ * the HW. ++ * ++ * If there is still FW-related work running after this function returns, ++ * they must use drm_dev_{enter,exit}() and skip any HW access when ++ * drm_dev_enter() returns false. ++ */ ++void panthor_fw_unplug(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_section *section; ++ ++ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); ++ ++ /* Make sure the IRQ handler can be called after that point. */ ++ if (ptdev->fw->irq.irq) ++ panthor_job_irq_suspend(&ptdev->fw->irq); ++ ++ panthor_fw_stop(ptdev); ++ ++ list_for_each_entry(section, &ptdev->fw->sections, node) ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); ++ ++ /* We intentionally don't call panthor_vm_idle() and let ++ * panthor_mmu_unplug() release the AS we acquired with ++ * panthor_vm_active() so we don't have to track the VM active/idle ++ * state to keep the active_refcnt balanced. ++ */ ++ panthor_vm_put(ptdev->fw->vm); ++ ++ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); ++} ++ ++/** ++ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. ++ * @req_ptr: Pointer to the req register. ++ * @ack_ptr: Pointer to the ack register. ++ * @wq: Wait queue to use for the sleeping wait. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, ++ wait_queue_head_t *wq, ++ u32 req_mask, u32 *acked, ++ u32 timeout_ms) ++{ ++ u32 ack, req = READ_ONCE(*req_ptr) & req_mask; ++ int ret; ++ ++ /* Busy wait for a few µsecs before falling back to a sleeping wait. */ ++ *acked = req_mask; ++ ret = read_poll_timeout_atomic(READ_ONCE, ack, ++ (ack & req_mask) == req, ++ 0, 10, 0, ++ *ack_ptr); ++ if (!ret) ++ return 0; ++ ++ if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, ++ msecs_to_jiffies(timeout_ms))) ++ return 0; ++ ++ /* Check one last time, in case we were not woken up for some reason. */ ++ ack = READ_ONCE(*ack_ptr); ++ if ((ack & req_mask) == req) ++ return 0; ++ ++ *acked = ~(req ^ ack) & req_mask; ++ return -ETIMEDOUT; ++} ++ ++/** ++ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. ++ * @ptdev: Device. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, ++ u32 req_mask, u32 *acked, ++ u32 timeout_ms) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ /* GLB_HALT doesn't get acked through the FW interface. */ ++ if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) ++ return -EINVAL; ++ ++ return panthor_fw_wait_acks(&glb_iface->input->req, ++ &glb_iface->output->ack, ++ &ptdev->fw->req_waitqueue, ++ req_mask, acked, timeout_ms); ++} ++ ++/** ++ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. ++ * @ptdev: Device. ++ * @csg_slot: CSG slot ID. ++ * @req_mask: Mask of requests to wait for. ++ * @acked: Pointer to field that's updated with the acked requests. ++ * If the function returns 0, *acked == req_mask. ++ * @timeout_ms: Timeout expressed in milliseconds. ++ * ++ * Return: 0 on success, -ETIMEDOUT otherwise. ++ */ ++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, ++ u32 req_mask, u32 *acked, u32 timeout_ms) ++{ ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); ++ int ret; ++ ++ if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) ++ return -EINVAL; ++ ++ ret = panthor_fw_wait_acks(&csg_iface->input->req, ++ &csg_iface->output->ack, ++ &ptdev->fw->req_waitqueue, ++ req_mask, acked, timeout_ms); ++ ++ /* ++ * Check that all bits in the state field were updated, if any mismatch ++ * then clear all bits in the state field. This allows code to do ++ * (acked & CSG_STATE_MASK) and get the right value. ++ */ ++ ++ if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) ++ *acked &= ~CSG_STATE_MASK; ++ ++ return ret; ++} ++ ++/** ++ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. ++ * @ptdev: Device. ++ * @csg_mask: Bitmask encoding the command stream group doorbells to ring. ++ * ++ * This function is toggling bits in the doorbell_req and ringing the ++ * global doorbell. It doesn't require a user doorbell to be attached to ++ * the group. ++ */ ++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++} ++ ++static void panthor_fw_ping_work(struct work_struct *work) ++{ ++ struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); ++ struct panthor_device *ptdev = fw->irq.ptdev; ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 acked; ++ int ret; ++ ++ if (panthor_device_reset_is_pending(ptdev)) ++ return; ++ ++ panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); ++ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); ++ ++ ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); ++ } else { ++ mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, ++ msecs_to_jiffies(PING_INTERVAL_MS)); ++ } ++} ++ ++/** ++ * panthor_fw_init() - Initialize FW related data. ++ * @ptdev: Device. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++int panthor_fw_init(struct panthor_device *ptdev) ++{ ++ struct panthor_fw *fw; ++ int ret, irq; ++ ++ fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); ++ if (!fw) ++ return -ENOMEM; ++ ++ ptdev->fw = fw; ++ init_waitqueue_head(&fw->req_waitqueue); ++ INIT_LIST_HEAD(&fw->sections); ++ INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); ++ ++ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); ++ if (irq <= 0) ++ return -ENODEV; ++ ++ ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); ++ if (ret) { ++ drm_err(&ptdev->base, "failed to request job irq"); ++ return ret; ++ } ++ ++ ret = panthor_gpu_l2_power_on(ptdev); ++ if (ret) ++ return ret; ++ ++ fw->vm = panthor_vm_create(ptdev, true, ++ 0, SZ_4G, ++ CSF_MCU_SHARED_REGION_START, ++ CSF_MCU_SHARED_REGION_SIZE); ++ if (IS_ERR(fw->vm)) { ++ ret = PTR_ERR(fw->vm); ++ fw->vm = NULL; ++ goto err_unplug_fw; ++ } ++ ++ ret = panthor_fw_load(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_vm_active(fw->vm); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_fw_start(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ ret = panthor_fw_init_ifaces(ptdev); ++ if (ret) ++ goto err_unplug_fw; ++ ++ panthor_fw_init_global_iface(ptdev); ++ return 0; ++ ++err_unplug_fw: ++ panthor_fw_unplug(ptdev); ++ return ret; ++} ++ ++MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); +diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h +new file mode 100644 +index 000000000000..22448abde992 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_fw.h +@@ -0,0 +1,503 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_MCU_H__ ++#define __PANTHOR_MCU_H__ ++ ++#include ++ ++struct panthor_device; ++struct panthor_kernel_bo; ++ ++#define MAX_CSGS 31 ++#define MAX_CS_PER_CSG 32 ++ ++struct panthor_fw_ringbuf_input_iface { ++ u64 insert; ++ u64 extract; ++}; ++ ++struct panthor_fw_ringbuf_output_iface { ++ u64 extract; ++ u32 active; ++}; ++ ++struct panthor_fw_cs_control_iface { ++#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) ++#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) ++#define CS_FEATURES_COMPUTE BIT(16) ++#define CS_FEATURES_FRAGMENT BIT(17) ++#define CS_FEATURES_TILER BIT(18) ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++}; ++ ++struct panthor_fw_cs_input_iface { ++#define CS_STATE_MASK GENMASK(2, 0) ++#define CS_STATE_STOP 0 ++#define CS_STATE_START 1 ++#define CS_EXTRACT_EVENT BIT(4) ++#define CS_IDLE_SYNC_WAIT BIT(8) ++#define CS_IDLE_PROTM_PENDING BIT(9) ++#define CS_IDLE_EMPTY BIT(10) ++#define CS_IDLE_RESOURCE_REQ BIT(11) ++#define CS_TILER_OOM BIT(26) ++#define CS_PROTM_PENDING BIT(27) ++#define CS_FATAL BIT(30) ++#define CS_FAULT BIT(31) ++#define CS_REQ_MASK (CS_STATE_MASK | \ ++ CS_EXTRACT_EVENT | \ ++ CS_IDLE_SYNC_WAIT | \ ++ CS_IDLE_PROTM_PENDING | \ ++ CS_IDLE_EMPTY | \ ++ CS_IDLE_RESOURCE_REQ) ++#define CS_EVT_MASK (CS_TILER_OOM | \ ++ CS_PROTM_PENDING | \ ++ CS_FATAL | \ ++ CS_FAULT) ++ u32 req; ++ ++#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) ++#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) ++ u32 config; ++ u32 reserved1; ++ u32 ack_irq_mask; ++ u64 ringbuf_base; ++ u32 ringbuf_size; ++ u32 reserved2; ++ u64 heap_start; ++ u64 heap_end; ++ u64 ringbuf_input; ++ u64 ringbuf_output; ++ u32 instr_config; ++ u32 instrbuf_size; ++ u64 instrbuf_base; ++ u64 instrbuf_offset_ptr; ++}; ++ ++struct panthor_fw_cs_output_iface { ++ u32 ack; ++ u32 reserved1[15]; ++ u64 status_cmd_ptr; ++ ++#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) ++#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) ++#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) ++#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) ++#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) ++#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) ++#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) ++#define CS_STATUS_WAIT_PROGRESS BIT(28) ++#define CS_STATUS_WAIT_PROTM BIT(29) ++#define CS_STATUS_WAIT_SYNC_64B BIT(30) ++#define CS_STATUS_WAIT_SYNC BIT(31) ++ u32 status_wait; ++ u32 status_req_resource; ++ u64 status_wait_sync_ptr; ++ u32 status_wait_sync_value; ++ u32 status_scoreboards; ++ ++#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 ++#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 ++#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 ++#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 ++#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 ++#define CS_STATUS_BLOCKED_REASON_RES 6 ++#define CS_STATUS_BLOCKED_REASON_FLUSH 7 ++#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) ++ u32 status_blocked_reason; ++ u32 status_wait_sync_value_hi; ++ u32 reserved2[6]; ++ ++#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) ++#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) ++ u32 fault; ++ u32 fatal; ++ u64 fault_info; ++ u64 fatal_info; ++ u32 reserved3[10]; ++ u32 heap_vt_start; ++ u32 heap_vt_end; ++ u32 reserved4; ++ u32 heap_frag_end; ++ u64 heap_address; ++}; ++ ++struct panthor_fw_csg_control_iface { ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++ u32 suspend_size; ++ u32 protm_suspend_size; ++ u32 stream_num; ++ u32 stream_stride; ++}; ++ ++struct panthor_fw_csg_input_iface { ++#define CSG_STATE_MASK GENMASK(2, 0) ++#define CSG_STATE_TERMINATE 0 ++#define CSG_STATE_START 1 ++#define CSG_STATE_SUSPEND 2 ++#define CSG_STATE_RESUME 3 ++#define CSG_ENDPOINT_CONFIG BIT(4) ++#define CSG_STATUS_UPDATE BIT(5) ++#define CSG_SYNC_UPDATE BIT(28) ++#define CSG_IDLE BIT(29) ++#define CSG_DOORBELL BIT(30) ++#define CSG_PROGRESS_TIMER_EVENT BIT(31) ++#define CSG_REQ_MASK (CSG_STATE_MASK | \ ++ CSG_ENDPOINT_CONFIG | \ ++ CSG_STATUS_UPDATE) ++#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ ++ CSG_IDLE | \ ++ CSG_PROGRESS_TIMER_EVENT) ++ u32 req; ++ u32 ack_irq_mask; ++ ++ u32 doorbell_req; ++ u32 cs_irq_ack; ++ u32 reserved1[4]; ++ u64 allow_compute; ++ u64 allow_fragment; ++ u32 allow_other; ++ ++#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) ++#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) ++#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) ++#define CSG_EP_REQ_EXCL_COMPUTE BIT(20) ++#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) ++#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) ++#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) ++ u32 endpoint_req; ++ u32 reserved2[2]; ++ u64 suspend_buf; ++ u64 protm_suspend_buf; ++ u32 config; ++ u32 iter_trace_config; ++}; ++ ++struct panthor_fw_csg_output_iface { ++ u32 ack; ++ u32 reserved1; ++ u32 doorbell_ack; ++ u32 cs_irq_req; ++ u32 status_endpoint_current; ++ u32 status_endpoint_req; ++ ++#define CSG_STATUS_STATE_IS_IDLE BIT(0) ++ u32 status_state; ++ u32 resource_dep; ++}; ++ ++struct panthor_fw_global_control_iface { ++ u32 version; ++ u32 features; ++ u32 input_va; ++ u32 output_va; ++ u32 group_num; ++ u32 group_stride; ++ u32 perfcnt_size; ++ u32 instr_features; ++}; ++ ++struct panthor_fw_global_input_iface { ++#define GLB_HALT BIT(0) ++#define GLB_CFG_PROGRESS_TIMER BIT(1) ++#define GLB_CFG_ALLOC_EN BIT(2) ++#define GLB_CFG_POWEROFF_TIMER BIT(3) ++#define GLB_PROTM_ENTER BIT(4) ++#define GLB_PERFCNT_EN BIT(5) ++#define GLB_PERFCNT_SAMPLE BIT(6) ++#define GLB_COUNTER_EN BIT(7) ++#define GLB_PING BIT(8) ++#define GLB_FWCFG_UPDATE BIT(9) ++#define GLB_IDLE_EN BIT(10) ++#define GLB_SLEEP BIT(12) ++#define GLB_INACTIVE_COMPUTE BIT(20) ++#define GLB_INACTIVE_FRAGMENT BIT(21) ++#define GLB_INACTIVE_TILER BIT(22) ++#define GLB_PROTM_EXIT BIT(23) ++#define GLB_PERFCNT_THRESHOLD BIT(24) ++#define GLB_PERFCNT_OVERFLOW BIT(25) ++#define GLB_IDLE BIT(26) ++#define GLB_DBG_CSF BIT(30) ++#define GLB_DBG_HOST BIT(31) ++#define GLB_REQ_MASK GENMASK(10, 0) ++#define GLB_EVT_MASK GENMASK(26, 20) ++ u32 req; ++ u32 ack_irq_mask; ++ u32 doorbell_req; ++ u32 reserved1; ++ u32 progress_timer; ++ ++#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) ++#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) ++ u32 poweroff_timer; ++ u64 core_en_mask; ++ u32 reserved2; ++ u32 perfcnt_as; ++ u64 perfcnt_base; ++ u32 perfcnt_extract; ++ u32 reserved3[3]; ++ u32 perfcnt_config; ++ u32 perfcnt_csg_select; ++ u32 perfcnt_fw_enable; ++ u32 perfcnt_csg_enable; ++ u32 perfcnt_csf_enable; ++ u32 perfcnt_shader_enable; ++ u32 perfcnt_tiler_enable; ++ u32 perfcnt_mmu_l2_enable; ++ u32 reserved4[8]; ++ u32 idle_timer; ++}; ++ ++enum panthor_fw_halt_status { ++ PANTHOR_FW_HALT_OK = 0, ++ PANTHOR_FW_HALT_ON_PANIC = 0x4e, ++ PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, ++}; ++ ++struct panthor_fw_global_output_iface { ++ u32 ack; ++ u32 reserved1; ++ u32 doorbell_ack; ++ u32 reserved2; ++ u32 halt_status; ++ u32 perfcnt_status; ++ u32 perfcnt_insert; ++}; ++ ++/** ++ * struct panthor_fw_cs_iface - Firmware command stream slot interface ++ */ ++struct panthor_fw_cs_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream slot control interface. ++ * ++ * Used to expose command stream slot properties. ++ * ++ * This interface is read-only. ++ */ ++ struct panthor_fw_cs_control_iface *control; ++ ++ /** ++ * @input: Command stream slot input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_cs_input_iface *input; ++ ++ /** ++ * @output: Command stream slot output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_cs_output_iface *output; ++}; ++ ++/** ++ * struct panthor_fw_csg_iface - Firmware command stream group slot interface ++ */ ++struct panthor_fw_csg_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream group slot control interface. ++ * ++ * Used to expose command stream group slot properties. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_csg_control_iface *control; ++ ++ /** ++ * @input: Command stream slot input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_csg_input_iface *input; ++ ++ /** ++ * @output: Command stream group slot output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_csg_output_iface *output; ++}; ++ ++/** ++ * struct panthor_fw_global_iface - Firmware global interface ++ */ ++struct panthor_fw_global_iface { ++ /** ++ * @lock: Lock protecting access to the panthor_fw_global_input_iface::req ++ * field. ++ * ++ * Needed so we can update the req field concurrently from the interrupt ++ * handler and the scheduler/FW management logic. ++ * ++ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW ++ * interface sections are mapped uncached/write-combined right now, and ++ * using cmpxchg() on such mappings leads to SError faults. Revisit when ++ * we have 'SHARED' GPU mappings hooked up. ++ */ ++ spinlock_t lock; ++ ++ /** ++ * @control: Command stream group slot control interface. ++ * ++ * Used to expose global FW properties. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_global_control_iface *control; ++ ++ /** ++ * @input: Global input interface. ++ * ++ * Used for host updates/events. ++ */ ++ struct panthor_fw_global_input_iface *input; ++ ++ /** ++ * @output: Global output interface. ++ * ++ * Used for FW updates/events. ++ * ++ * This interface is read-only. ++ */ ++ const struct panthor_fw_global_output_iface *output; ++}; ++ ++/** ++ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW ++ * @__iface: The interface to operate on. ++ * @__in_reg: Name of the register to update in the input section of the interface. ++ * @__out_reg: Name of the register to take as a reference in the output section of the ++ * interface. ++ * @__mask: Mask to apply to the update. ++ * ++ * The Host -> FW event/message passing was designed to be lockless, with each side of ++ * the channel having its writeable section. Events are signaled as a difference between ++ * the host and FW side in the req/ack registers (when a bit differs, there's an event ++ * pending, when they are the same, nothing needs attention). ++ * ++ * This helper allows one to update the req register based on the current value of the ++ * ack register managed by the FW. Toggling a specific bit will flag an event. In order ++ * for events to be re-evaluated, the interface doorbell needs to be rung. ++ * ++ * Concurrent accesses to the same req register is covered. ++ * ++ * Anything requiring atomic updates to multiple registers requires a dedicated lock. ++ */ ++#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ ++ do { \ ++ u32 __cur_val, __new_val, __out_val; \ ++ spin_lock(&(__iface)->lock); \ ++ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ ++ __out_val = READ_ONCE((__iface)->output->__out_reg); \ ++ __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ ++ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ ++ spin_unlock(&(__iface)->lock); \ ++ } while (0) ++ ++/** ++ * panthor_fw_update_reqs() - Update bits to reflect a configuration change ++ * @__iface: The interface to operate on. ++ * @__in_reg: Name of the register to update in the input section of the interface. ++ * @__val: Value to set. ++ * @__mask: Mask to apply to the update. ++ * ++ * Some configuration get passed through req registers that are also used to ++ * send events to the FW. Those req registers being updated from the interrupt ++ * handler, they require special helpers to update the configuration part as well. ++ * ++ * Concurrent accesses to the same req register is covered. ++ * ++ * Anything requiring atomic updates to multiple registers requires a dedicated lock. ++ */ ++#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ ++ do { \ ++ u32 __cur_val, __new_val; \ ++ spin_lock(&(__iface)->lock); \ ++ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ ++ __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ ++ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ ++ spin_unlock(&(__iface)->lock); \ ++ } while (0) ++ ++struct panthor_fw_global_iface * ++panthor_fw_get_glb_iface(struct panthor_device *ptdev); ++ ++struct panthor_fw_csg_iface * ++panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); ++ ++struct panthor_fw_cs_iface * ++panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); ++ ++int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, ++ u32 *acked, u32 timeout_ms); ++ ++int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, ++ u32 timeout_ms); ++ ++void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); ++ ++struct panthor_kernel_bo * ++panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, ++ struct panthor_fw_ringbuf_input_iface **input, ++ const struct panthor_fw_ringbuf_output_iface **output, ++ u32 *input_fw_va, u32 *output_fw_va); ++struct panthor_kernel_bo * ++panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); ++ ++struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); ++ ++void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); ++int panthor_fw_post_reset(struct panthor_device *ptdev); ++ ++static inline void panthor_fw_suspend(struct panthor_device *ptdev) ++{ ++ panthor_fw_pre_reset(ptdev, false); ++} ++ ++static inline int panthor_fw_resume(struct panthor_device *ptdev) ++{ ++ return panthor_fw_post_reset(ptdev); ++} ++ ++int panthor_fw_init(struct panthor_device *ptdev); ++void panthor_fw_unplug(struct panthor_device *ptdev); ++ ++#endif +-- +2.42.0 + + +From 36bc9b5534ddcbda518f1066273e7e41f80b18c9 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:23 +0100 +Subject: [PATCH 11/71] [MERGED] drm/panthor: Add the heap logical block + +Tiler heap growing requires some kernel driver involvement: when the +tiler runs out of heap memory, it will raise an exception which is +either directly handled by the firmware if some free heap chunks are +available in the heap context, or passed back to the kernel otherwise. +The heap helpers will be used by the scheduler logic to allocate more +heap chunks to a heap context, when such a situation happens. + +Heap context creation is explicitly requested by userspace (using +the TILER_HEAP_CREATE ioctl), and the returned context is attached to a +queue through some command stream instruction. + +All the kernel does is keep the list of heap chunks allocated to a +context, so they can be freed when TILER_HEAP_DESTROY is called, or +extended when the FW requests a new chunk. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Fix FIXME comment +- Add Steve's R-b + +v4: +- Rework locking to allow concurrent calls to panthor_heap_grow() +- Add a helper to return a heap chunk if we couldn't pass it to the + FW because the group was scheduled out + +v3: +- Add a FIXME for the heap OOM deadlock +- Use the panthor_kernel_bo abstraction for the heap context and heap + chunks +- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver +- Ensure that the heap context is aligned to the GPU cache line size +- Minor code tidy ups + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-10-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_heap.c | 597 +++++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_heap.h | 39 ++ + 2 files changed, 636 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_heap.c + create mode 100644 drivers/gpu/drm/panthor/panthor_heap.h + +diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c +new file mode 100644 +index 000000000000..143fa35f2e74 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_heap.c +@@ -0,0 +1,597 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++ ++#include ++ ++#include "panthor_device.h" ++#include "panthor_gem.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++ ++/* ++ * The GPU heap context is an opaque structure used by the GPU to track the ++ * heap allocations. The driver should only touch it to initialize it (zero all ++ * fields). Because the CPU and GPU can both access this structure it is ++ * required to be GPU cache line aligned. ++ */ ++#define HEAP_CONTEXT_SIZE 32 ++ ++/** ++ * struct panthor_heap_chunk_header - Heap chunk header ++ */ ++struct panthor_heap_chunk_header { ++ /** ++ * @next: Next heap chunk in the list. ++ * ++ * This is a GPU VA. ++ */ ++ u64 next; ++ ++ /** @unknown: MBZ. */ ++ u32 unknown[14]; ++}; ++ ++/** ++ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. ++ */ ++struct panthor_heap_chunk { ++ /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ ++ struct list_head node; ++ ++ /** @bo: Buffer object backing the heap chunk. */ ++ struct panthor_kernel_bo *bo; ++}; ++ ++/** ++ * struct panthor_heap - Structure used to manage tiler heap contexts. ++ */ ++struct panthor_heap { ++ /** @chunks: List containing all heap chunks allocated so far. */ ++ struct list_head chunks; ++ ++ /** @lock: Lock protecting insertion in the chunks list. */ ++ struct mutex lock; ++ ++ /** @chunk_size: Size of each chunk. */ ++ u32 chunk_size; ++ ++ /** @max_chunks: Maximum number of chunks. */ ++ u32 max_chunks; ++ ++ /** ++ * @target_in_flight: Number of in-flight render passes after which ++ * we'd let the FW wait for fragment job to finish instead of allocating new chunks. ++ */ ++ u32 target_in_flight; ++ ++ /** @chunk_count: Number of heap chunks currently allocated. */ ++ u32 chunk_count; ++}; ++ ++#define MAX_HEAPS_PER_POOL 128 ++ ++/** ++ * struct panthor_heap_pool - Pool of heap contexts ++ * ++ * The pool is attached to a panthor_file and can't be shared across processes. ++ */ ++struct panthor_heap_pool { ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @vm: VM this pool is bound to. */ ++ struct panthor_vm *vm; ++ ++ /** @lock: Lock protecting access to @xa. */ ++ struct rw_semaphore lock; ++ ++ /** @xa: Array storing panthor_heap objects. */ ++ struct xarray xa; ++ ++ /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ ++ struct panthor_kernel_bo *gpu_contexts; ++}; ++ ++static int panthor_heap_ctx_stride(struct panthor_device *ptdev) ++{ ++ u32 l2_features = ptdev->gpu_info.l2_features; ++ u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); ++ ++ return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); ++} ++ ++static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) ++{ ++ return panthor_heap_ctx_stride(pool->ptdev) * id; ++} ++ ++static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) ++{ ++ return pool->gpu_contexts->kmap + ++ panthor_get_heap_ctx_offset(pool, id); ++} ++ ++static void panthor_free_heap_chunk(struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ struct panthor_heap_chunk *chunk) ++{ ++ mutex_lock(&heap->lock); ++ list_del(&chunk->node); ++ heap->chunk_count--; ++ mutex_unlock(&heap->lock); ++ ++ panthor_kernel_bo_destroy(vm, chunk->bo); ++ kfree(chunk); ++} ++ ++static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, ++ struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ bool initial_chunk) ++{ ++ struct panthor_heap_chunk *chunk; ++ struct panthor_heap_chunk_header *hdr; ++ int ret; ++ ++ chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); ++ if (!chunk) ++ return -ENOMEM; ++ ++ chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(chunk->bo)) { ++ ret = PTR_ERR(chunk->bo); ++ goto err_free_chunk; ++ } ++ ++ ret = panthor_kernel_bo_vmap(chunk->bo); ++ if (ret) ++ goto err_destroy_bo; ++ ++ hdr = chunk->bo->kmap; ++ memset(hdr, 0, sizeof(*hdr)); ++ ++ if (initial_chunk && !list_empty(&heap->chunks)) { ++ struct panthor_heap_chunk *prev_chunk; ++ u64 prev_gpuva; ++ ++ prev_chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ ++ prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); ++ hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | ++ (heap->chunk_size >> 12); ++ } ++ ++ panthor_kernel_bo_vunmap(chunk->bo); ++ ++ mutex_lock(&heap->lock); ++ list_add(&chunk->node, &heap->chunks); ++ heap->chunk_count++; ++ mutex_unlock(&heap->lock); ++ ++ return 0; ++ ++err_destroy_bo: ++ panthor_kernel_bo_destroy(vm, chunk->bo); ++ ++err_free_chunk: ++ kfree(chunk); ++ ++ return ret; ++} ++ ++static void panthor_free_heap_chunks(struct panthor_vm *vm, ++ struct panthor_heap *heap) ++{ ++ struct panthor_heap_chunk *chunk, *tmp; ++ ++ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) ++ panthor_free_heap_chunk(vm, heap, chunk); ++} ++ ++static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, ++ struct panthor_vm *vm, ++ struct panthor_heap *heap, ++ u32 chunk_count) ++{ ++ int ret; ++ u32 i; ++ ++ for (i = 0; i < chunk_count; i++) { ++ ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++static int ++panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) ++{ ++ struct panthor_heap *heap; ++ ++ heap = xa_erase(&pool->xa, handle); ++ if (!heap) ++ return -EINVAL; ++ ++ panthor_free_heap_chunks(pool->vm, heap); ++ mutex_destroy(&heap->lock); ++ kfree(heap); ++ return 0; ++} ++ ++/** ++ * panthor_heap_destroy() - Destroy a heap context ++ * @pool: Pool this context belongs to. ++ * @handle: Handle returned by panthor_heap_create(). ++ */ ++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) ++{ ++ int ret; ++ ++ down_write(&pool->lock); ++ ret = panthor_heap_destroy_locked(pool, handle); ++ up_write(&pool->lock); ++ ++ return ret; ++} ++ ++/** ++ * panthor_heap_create() - Create a heap context ++ * @pool: Pool to instantiate the heap context from. ++ * @initial_chunk_count: Number of chunk allocated at initialization time. ++ * Must be at least 1. ++ * @chunk_size: The size of each chunk. Must be a power of two between 256k ++ * and 2M. ++ * @max_chunks: Maximum number of chunks that can be allocated. ++ * @target_in_flight: Maximum number of in-flight render passes. ++ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap ++ * context. ++ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk ++ * assigned to the heap context. ++ * ++ * Return: a positive handle on success, a negative error otherwise. ++ */ ++int panthor_heap_create(struct panthor_heap_pool *pool, ++ u32 initial_chunk_count, ++ u32 chunk_size, ++ u32 max_chunks, ++ u32 target_in_flight, ++ u64 *heap_ctx_gpu_va, ++ u64 *first_chunk_gpu_va) ++{ ++ struct panthor_heap *heap; ++ struct panthor_heap_chunk *first_chunk; ++ struct panthor_vm *vm; ++ int ret = 0; ++ u32 id; ++ ++ if (initial_chunk_count == 0) ++ return -EINVAL; ++ ++ if (hweight32(chunk_size) != 1 || ++ chunk_size < SZ_256K || chunk_size > SZ_2M) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ vm = panthor_vm_get(pool->vm); ++ up_read(&pool->lock); ++ ++ /* The pool has been destroyed, we can't create a new heap. */ ++ if (!vm) ++ return -EINVAL; ++ ++ heap = kzalloc(sizeof(*heap), GFP_KERNEL); ++ if (!heap) { ++ ret = -ENOMEM; ++ goto err_put_vm; ++ } ++ ++ mutex_init(&heap->lock); ++ INIT_LIST_HEAD(&heap->chunks); ++ heap->chunk_size = chunk_size; ++ heap->max_chunks = max_chunks; ++ heap->target_in_flight = target_in_flight; ++ ++ ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, ++ initial_chunk_count); ++ if (ret) ++ goto err_free_heap; ++ ++ first_chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); ++ ++ down_write(&pool->lock); ++ /* The pool has been destroyed, we can't create a new heap. */ ++ if (!pool->vm) { ++ ret = -EINVAL; ++ } else { ++ ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL); ++ if (!ret) { ++ void *gpu_ctx = panthor_get_heap_ctx(pool, id); ++ ++ memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); ++ *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + ++ panthor_get_heap_ctx_offset(pool, id); ++ } ++ } ++ up_write(&pool->lock); ++ ++ if (ret) ++ goto err_free_heap; ++ ++ panthor_vm_put(vm); ++ return id; ++ ++err_free_heap: ++ panthor_free_heap_chunks(pool->vm, heap); ++ mutex_destroy(&heap->lock); ++ kfree(heap); ++ ++err_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++/** ++ * panthor_heap_return_chunk() - Return an unused heap chunk ++ * @pool: The pool this heap belongs to. ++ * @heap_gpu_va: The GPU address of the heap context. ++ * @chunk_gpu_va: The chunk VA to return. ++ * ++ * This function is used when a chunk allocated with panthor_heap_grow() ++ * couldn't be linked to the heap context through the FW interface because ++ * the group requesting the allocation was scheduled out in the meantime. ++ */ ++int panthor_heap_return_chunk(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u64 chunk_gpu_va) ++{ ++ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); ++ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); ++ struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; ++ struct panthor_heap *heap; ++ int ret; ++ ++ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ heap = xa_load(&pool->xa, heap_id); ++ if (!heap) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ chunk_gpu_va &= GENMASK_ULL(63, 12); ++ ++ mutex_lock(&heap->lock); ++ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { ++ if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { ++ removed = chunk; ++ list_del(&chunk->node); ++ heap->chunk_count--; ++ break; ++ } ++ } ++ mutex_unlock(&heap->lock); ++ ++ if (removed) { ++ panthor_kernel_bo_destroy(pool->vm, chunk->bo); ++ kfree(chunk); ++ ret = 0; ++ } else { ++ ret = -EINVAL; ++ } ++ ++out_unlock: ++ up_read(&pool->lock); ++ return ret; ++} ++ ++/** ++ * panthor_heap_grow() - Make a heap context grow. ++ * @pool: The pool this heap belongs to. ++ * @heap_gpu_va: The GPU address of the heap context. ++ * @renderpasses_in_flight: Number of render passes currently in-flight. ++ * @pending_frag_count: Number of fragment jobs waiting for execution/completion. ++ * @new_chunk_gpu_va: Pointer used to return the chunk VA. ++ */ ++int panthor_heap_grow(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u32 renderpasses_in_flight, ++ u32 pending_frag_count, ++ u64 *new_chunk_gpu_va) ++{ ++ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); ++ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); ++ struct panthor_heap_chunk *chunk; ++ struct panthor_heap *heap; ++ int ret; ++ ++ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) ++ return -EINVAL; ++ ++ down_read(&pool->lock); ++ heap = xa_load(&pool->xa, heap_id); ++ if (!heap) { ++ ret = -EINVAL; ++ goto out_unlock; ++ } ++ ++ /* If we reached the target in-flight render passes, or if we ++ * reached the maximum number of chunks, let the FW figure another way to ++ * find some memory (wait for render passes to finish, or call the exception ++ * handler provided by the userspace driver, if any). ++ */ ++ if (renderpasses_in_flight > heap->target_in_flight || ++ (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) { ++ ret = -EBUSY; ++ goto out_unlock; ++ } else if (heap->chunk_count >= heap->max_chunks) { ++ ret = -ENOMEM; ++ goto out_unlock; ++ } ++ ++ /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, ++ * which goes through the blocking allocation path. Ultimately, we ++ * want a non-blocking allocation, so we can immediately report to the ++ * FW when the system is running out of memory. In that case, the FW ++ * can call a user-provided exception handler, which might try to free ++ * some tiler memory by issuing an intermediate fragment job. If the ++ * exception handler can't do anything, it will flag the queue as ++ * faulty so the job that triggered this tiler chunk allocation and all ++ * further jobs in this queue fail immediately instead of having to ++ * wait for the job timeout. ++ */ ++ ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); ++ if (ret) ++ goto out_unlock; ++ ++ chunk = list_first_entry(&heap->chunks, ++ struct panthor_heap_chunk, ++ node); ++ *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | ++ (heap->chunk_size >> 12); ++ ret = 0; ++ ++out_unlock: ++ up_read(&pool->lock); ++ return ret; ++} ++ ++static void panthor_heap_pool_release(struct kref *refcount) ++{ ++ struct panthor_heap_pool *pool = ++ container_of(refcount, struct panthor_heap_pool, refcount); ++ ++ xa_destroy(&pool->xa); ++ kfree(pool); ++} ++ ++/** ++ * panthor_heap_pool_put() - Release a heap pool reference ++ * @pool: Pool to release the reference on. Can be NULL. ++ */ ++void panthor_heap_pool_put(struct panthor_heap_pool *pool) ++{ ++ if (pool) ++ kref_put(&pool->refcount, panthor_heap_pool_release); ++} ++ ++/** ++ * panthor_heap_pool_get() - Get a heap pool reference ++ * @pool: Pool to get the reference on. Can be NULL. ++ * ++ * Return: @pool. ++ */ ++struct panthor_heap_pool * ++panthor_heap_pool_get(struct panthor_heap_pool *pool) ++{ ++ if (pool) ++ kref_get(&pool->refcount); ++ ++ return pool; ++} ++ ++/** ++ * panthor_heap_pool_create() - Create a heap pool ++ * @ptdev: Device. ++ * @vm: The VM this heap pool will be attached to. ++ * ++ * Heap pools might contain up to 128 heap contexts, and are per-VM. ++ * ++ * Return: A valid pointer on success, a negative error code otherwise. ++ */ ++struct panthor_heap_pool * ++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) ++{ ++ size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * ++ panthor_heap_ctx_stride(ptdev), ++ 4096); ++ struct panthor_heap_pool *pool; ++ int ret = 0; ++ ++ pool = kzalloc(sizeof(*pool), GFP_KERNEL); ++ if (!pool) ++ return ERR_PTR(-ENOMEM); ++ ++ /* We want a weak ref here: the heap pool belongs to the VM, so we're ++ * sure that, as long as the heap pool exists, the VM exists too. ++ */ ++ pool->vm = vm; ++ pool->ptdev = ptdev; ++ init_rwsem(&pool->lock); ++ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1); ++ kref_init(&pool->refcount); ++ ++ pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(pool->gpu_contexts)) { ++ ret = PTR_ERR(pool->gpu_contexts); ++ goto err_destroy_pool; ++ } ++ ++ ret = panthor_kernel_bo_vmap(pool->gpu_contexts); ++ if (ret) ++ goto err_destroy_pool; ++ ++ return pool; ++ ++err_destroy_pool: ++ panthor_heap_pool_destroy(pool); ++ return ERR_PTR(ret); ++} ++ ++/** ++ * panthor_heap_pool_destroy() - Destroy a heap pool. ++ * @pool: Pool to destroy. ++ * ++ * This function destroys all heap contexts and their resources. Thus ++ * preventing any use of the heap context or the chunk attached to them ++ * after that point. ++ * ++ * If the GPU still has access to some heap contexts, a fault should be ++ * triggered, which should flag the command stream groups using these ++ * context as faulty. ++ * ++ * The heap pool object is only released when all references to this pool ++ * are released. ++ */ ++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) ++{ ++ struct panthor_heap *heap; ++ unsigned long i; ++ ++ if (!pool) ++ return; ++ ++ down_write(&pool->lock); ++ xa_for_each(&pool->xa, i, heap) ++ drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); ++ ++ if (!IS_ERR_OR_NULL(pool->gpu_contexts)) ++ panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts); ++ ++ /* Reflects the fact the pool has been destroyed. */ ++ pool->vm = NULL; ++ up_write(&pool->lock); ++ ++ panthor_heap_pool_put(pool); ++} +diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h +new file mode 100644 +index 000000000000..25a5f2bba445 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_heap.h +@@ -0,0 +1,39 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_HEAP_H__ ++#define __PANTHOR_HEAP_H__ ++ ++#include ++ ++struct panthor_device; ++struct panthor_heap_pool; ++struct panthor_vm; ++ ++int panthor_heap_create(struct panthor_heap_pool *pool, ++ u32 initial_chunk_count, ++ u32 chunk_size, ++ u32 max_chunks, ++ u32 target_in_flight, ++ u64 *heap_ctx_gpu_va, ++ u64 *first_chunk_gpu_va); ++int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle); ++ ++struct panthor_heap_pool * ++panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm); ++void panthor_heap_pool_destroy(struct panthor_heap_pool *pool); ++ ++struct panthor_heap_pool * ++panthor_heap_pool_get(struct panthor_heap_pool *pool); ++void panthor_heap_pool_put(struct panthor_heap_pool *pool); ++ ++int panthor_heap_grow(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u32 renderpasses_in_flight, ++ u32 pending_frag_count, ++ u64 *new_chunk_gpu_va); ++int panthor_heap_return_chunk(struct panthor_heap_pool *pool, ++ u64 heap_gpu_va, ++ u64 chunk_gpu_va); ++ ++#endif +-- +2.42.0 + + +From 445101be35f77937b1591a17d0cb3aed3958e5ff Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:24 +0100 +Subject: [PATCH 12/71] [MERGED] drm/panthor: Add the scheduler logical block + +This is the piece of software interacting with the FW scheduler, and +taking care of some scheduling aspects when the FW comes short of slots +scheduling slots. Indeed, the FW only expose a few slots, and the kernel +has to give all submission contexts, a chance to execute their jobs. + +The kernel-side scheduler is timeslice-based, with a round-robin queue +per priority level. + +Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler, +allowing us to delegate the dependency tracking to the core. + +All the gory details should be documented inline. + +v6: +- Add Maxime's and Heiko's acks +- Make sure the scheduler is initialized before queueing the tick work + in the MMU fault handler +- Keep header inclusion alphabetically ordered + +v5: +- Fix typos +- Call panthor_kernel_bo_destroy(group->syncobjs) unconditionally +- Don't move the group to the waiting list tail when it was already + waiting for a different syncobj +- Fix fatal_queues flagging in the tiler OOM path +- Don't warn when more than one job timesout on a group +- Add a warning message when we fail to allocate a heap chunk +- Add Steve's R-b + +v4: +- Check drmm_mutex_init() return code +- s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in + panthor_queue_put_syncwait_obj() +- Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked() +- Use atomic_xchg() instead of atomic_fetch_and(0) +- Fix typos +- Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs +- Defer TILER_OOM event handling to a separate workqueue to prevent + deadlocks when the heap chunk allocation is blocked on mem-reclaim. + This is just a temporary solution, until we add support for + non-blocking/failable allocations +- Pass the scheduler workqueue to drm_sched instead of instantiating + a separate one (no longer needed now that heap chunk allocation + happens on a dedicated wq) +- Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle + job timeouts when the system is under mem pressure, and hopefully + free up some memory retained by these jobs + +v3: +- Rework the FW event handling logic to avoid races +- Make sure MMU faults kill the group immediately +- Use the panthor_kernel_bo abstraction for group/queue buffers +- Make in_progress an atomic_t, so we can check it without the reset lock + held +- Don't limit the number of groups per context to the FW scheduler + capacity. Fix the limit to 128 for now. +- Add a panthor_job_vm() helper +- Account for panthor_vm changes +- Add our job fence as DMA_RESV_USAGE_WRITE to all external objects + (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given + we're supposed to be fully-explicit, but other drivers do that, so + there must be a good reason +- Account for drm_sched changes +- Provide a panthor_queue_put_syncwait_obj() +- Unconditionally return groups to their idle list in + panthor_sched_suspend() +- Condition of sched_queue_{,delayed_}work fixed to be only when a reset + isn't pending or in progress. +- Several typos in comments fixed. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-11-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_sched.c | 3502 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/panthor_sched.h | 50 + + 2 files changed, 3552 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_sched.c + create mode 100644 drivers/gpu/drm/panthor/panthor_sched.h + +diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c +new file mode 100644 +index 000000000000..5f7803b6fc48 +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_sched.c +@@ -0,0 +1,3502 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2023 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_devfreq.h" ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++/** ++ * DOC: Scheduler ++ * ++ * Mali CSF hardware adopts a firmware-assisted scheduling model, where ++ * the firmware takes care of scheduling aspects, to some extent. ++ * ++ * The scheduling happens at the scheduling group level, each group ++ * contains 1 to N queues (N is FW/hardware dependent, and exposed ++ * through the firmware interface). Each queue is assigned a command ++ * stream ring buffer, which serves as a way to get jobs submitted to ++ * the GPU, among other things. ++ * ++ * The firmware can schedule a maximum of M groups (M is FW/hardware ++ * dependent, and exposed through the firmware interface). Passed ++ * this maximum number of groups, the kernel must take care of ++ * rotating the groups passed to the firmware so every group gets ++ * a chance to have his queues scheduled for execution. ++ * ++ * The current implementation only supports with kernel-mode queues. ++ * In other terms, userspace doesn't have access to the ring-buffer. ++ * Instead, userspace passes indirect command stream buffers that are ++ * called from the queue ring-buffer by the kernel using a pre-defined ++ * sequence of command stream instructions to ensure the userspace driver ++ * always gets consistent results (cache maintenance, ++ * synchronization, ...). ++ * ++ * We rely on the drm_gpu_scheduler framework to deal with job ++ * dependencies and submission. As any other driver dealing with a ++ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each ++ * entity has its own job scheduler. When a job is ready to be executed ++ * (all its dependencies are met), it is pushed to the appropriate ++ * queue ring-buffer, and the group is scheduled for execution if it ++ * wasn't already active. ++ * ++ * Kernel-side group scheduling is timeslice-based. When we have less ++ * groups than there are slots, the periodic tick is disabled and we ++ * just let the FW schedule the active groups. When there are more ++ * groups than slots, we let each group a chance to execute stuff for ++ * a given amount of time, and then re-evaluate and pick new groups ++ * to schedule. The group selection algorithm is based on ++ * priority+round-robin. ++ * ++ * Even though user-mode queues is out of the scope right now, the ++ * current design takes them into account by avoiding any guess on the ++ * group/queue state that would be based on information we wouldn't have ++ * if userspace was in charge of the ring-buffer. That's also one of the ++ * reason we don't do 'cooperative' scheduling (encoding FW group slot ++ * reservation as dma_fence that would be returned from the ++ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as ++ * a queue of waiters, ordered by job submission order). This approach ++ * would work for kernel-mode queues, but would make user-mode queues a ++ * lot more complicated to retrofit. ++ */ ++ ++#define JOB_TIMEOUT_MS 5000 ++ ++#define MIN_CS_PER_CSG 8 ++ ++#define MIN_CSGS 3 ++#define MAX_CSG_PRIO 0xf ++ ++struct panthor_group; ++ ++/** ++ * struct panthor_csg_slot - Command stream group slot ++ * ++ * This represents a FW slot for a scheduling group. ++ */ ++struct panthor_csg_slot { ++ /** @group: Scheduling group bound to this slot. */ ++ struct panthor_group *group; ++ ++ /** @priority: Group priority. */ ++ u8 priority; ++ ++ /** ++ * @idle: True if the group bound to this slot is idle. ++ * ++ * A group is idle when it has nothing waiting for execution on ++ * all its queues, or when queues are blocked waiting for something ++ * to happen (synchronization object). ++ */ ++ bool idle; ++}; ++ ++/** ++ * enum panthor_csg_priority - Group priority ++ */ ++enum panthor_csg_priority { ++ /** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */ ++ PANTHOR_CSG_PRIORITY_LOW = 0, ++ ++ /** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */ ++ PANTHOR_CSG_PRIORITY_MEDIUM, ++ ++ /** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */ ++ PANTHOR_CSG_PRIORITY_HIGH, ++ ++ /** ++ * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group. ++ * ++ * Real-time priority allows one to preempt scheduling of other ++ * non-real-time groups. When such a group becomes executable, ++ * it will evict the group with the lowest non-rt priority if ++ * there's no free group slot available. ++ * ++ * Currently not exposed to userspace. ++ */ ++ PANTHOR_CSG_PRIORITY_RT, ++ ++ /** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */ ++ PANTHOR_CSG_PRIORITY_COUNT, ++}; ++ ++/** ++ * struct panthor_scheduler - Object used to manage the scheduler ++ */ ++struct panthor_scheduler { ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** ++ * @wq: Workqueue used by our internal scheduler logic and ++ * drm_gpu_scheduler. ++ * ++ * Used for the scheduler tick, group update or other kind of FW ++ * event processing that can't be handled in the threaded interrupt ++ * path. Also passed to the drm_gpu_scheduler instances embedded ++ * in panthor_queue. ++ */ ++ struct workqueue_struct *wq; ++ ++ /** ++ * @heap_alloc_wq: Workqueue used to schedule tiler_oom works. ++ * ++ * We have a queue dedicated to heap chunk allocation works to avoid ++ * blocking the rest of the scheduler if the allocation tries to ++ * reclaim memory. ++ */ ++ struct workqueue_struct *heap_alloc_wq; ++ ++ /** @tick_work: Work executed on a scheduling tick. */ ++ struct delayed_work tick_work; ++ ++ /** ++ * @sync_upd_work: Work used to process synchronization object updates. ++ * ++ * We use this work to unblock queues/groups that were waiting on a ++ * synchronization object. ++ */ ++ struct work_struct sync_upd_work; ++ ++ /** ++ * @fw_events_work: Work used to process FW events outside the interrupt path. ++ * ++ * Even if the interrupt is threaded, we need any event processing ++ * that require taking the panthor_scheduler::lock to be processed ++ * outside the interrupt path so we don't block the tick logic when ++ * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the ++ * event processing requires taking this lock, we just delegate all ++ * FW event processing to the scheduler workqueue. ++ */ ++ struct work_struct fw_events_work; ++ ++ /** ++ * @fw_events: Bitmask encoding pending FW events. ++ */ ++ atomic_t fw_events; ++ ++ /** ++ * @resched_target: When the next tick should occur. ++ * ++ * Expressed in jiffies. ++ */ ++ u64 resched_target; ++ ++ /** ++ * @last_tick: When the last tick occurred. ++ * ++ * Expressed in jiffies. ++ */ ++ u64 last_tick; ++ ++ /** @tick_period: Tick period in jiffies. */ ++ u64 tick_period; ++ ++ /** ++ * @lock: Lock protecting access to all the scheduler fields. ++ * ++ * Should be taken in the tick work, the irq handler, and anywhere the @groups ++ * fields are touched. ++ */ ++ struct mutex lock; ++ ++ /** @groups: Various lists used to classify groups. */ ++ struct { ++ /** ++ * @runnable: Runnable group lists. ++ * ++ * When a group has queues that want to execute something, ++ * its panthor_group::run_node should be inserted here. ++ * ++ * One list per-priority. ++ */ ++ struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT]; ++ ++ /** ++ * @idle: Idle group lists. ++ * ++ * When all queues of a group are idle (either because they ++ * have nothing to execute, or because they are blocked), the ++ * panthor_group::run_node field should be inserted here. ++ * ++ * One list per-priority. ++ */ ++ struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT]; ++ ++ /** ++ * @waiting: List of groups whose queues are blocked on a ++ * synchronization object. ++ * ++ * Insert panthor_group::wait_node here when a group is waiting ++ * for synchronization objects to be signaled. ++ * ++ * This list is evaluated in the @sync_upd_work work. ++ */ ++ struct list_head waiting; ++ } groups; ++ ++ /** ++ * @csg_slots: FW command stream group slots. ++ */ ++ struct panthor_csg_slot csg_slots[MAX_CSGS]; ++ ++ /** @csg_slot_count: Number of command stream group slots exposed by the FW. */ ++ u32 csg_slot_count; ++ ++ /** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */ ++ u32 cs_slot_count; ++ ++ /** @as_slot_count: Number of address space slots supported by the MMU. */ ++ u32 as_slot_count; ++ ++ /** @used_csg_slot_count: Number of command stream group slot currently used. */ ++ u32 used_csg_slot_count; ++ ++ /** @sb_slot_count: Number of scoreboard slots. */ ++ u32 sb_slot_count; ++ ++ /** ++ * @might_have_idle_groups: True if an active group might have become idle. ++ * ++ * This will force a tick, so other runnable groups can be scheduled if one ++ * or more active groups became idle. ++ */ ++ bool might_have_idle_groups; ++ ++ /** @pm: Power management related fields. */ ++ struct { ++ /** @has_ref: True if the scheduler owns a runtime PM reference. */ ++ bool has_ref; ++ } pm; ++ ++ /** @reset: Reset related fields. */ ++ struct { ++ /** @lock: Lock protecting the other reset fields. */ ++ struct mutex lock; ++ ++ /** ++ * @in_progress: True if a reset is in progress. ++ * ++ * Set to true in panthor_sched_pre_reset() and back to false in ++ * panthor_sched_post_reset(). ++ */ ++ atomic_t in_progress; ++ ++ /** ++ * @stopped_groups: List containing all groups that were stopped ++ * before a reset. ++ * ++ * Insert panthor_group::run_node in the pre_reset path. ++ */ ++ struct list_head stopped_groups; ++ } reset; ++}; ++ ++/** ++ * struct panthor_syncobj_32b - 32-bit FW synchronization object ++ */ ++struct panthor_syncobj_32b { ++ /** @seqno: Sequence number. */ ++ u32 seqno; ++ ++ /** ++ * @status: Status. ++ * ++ * Not zero on failure. ++ */ ++ u32 status; ++}; ++ ++/** ++ * struct panthor_syncobj_64b - 64-bit FW synchronization object ++ */ ++struct panthor_syncobj_64b { ++ /** @seqno: Sequence number. */ ++ u64 seqno; ++ ++ /** ++ * @status: Status. ++ * ++ * Not zero on failure. ++ */ ++ u32 status; ++ ++ /** @pad: MBZ. */ ++ u32 pad; ++}; ++ ++/** ++ * struct panthor_queue - Execution queue ++ */ ++struct panthor_queue { ++ /** @scheduler: DRM scheduler used for this queue. */ ++ struct drm_gpu_scheduler scheduler; ++ ++ /** @entity: DRM scheduling entity used for this queue. */ ++ struct drm_sched_entity entity; ++ ++ /** ++ * @remaining_time: Time remaining before the job timeout expires. ++ * ++ * The job timeout is suspended when the queue is not scheduled by the ++ * FW. Every time we suspend the timer, we need to save the remaining ++ * time so we can restore it later on. ++ */ ++ unsigned long remaining_time; ++ ++ /** @timeout_suspended: True if the job timeout was suspended. */ ++ bool timeout_suspended; ++ ++ /** ++ * @doorbell_id: Doorbell assigned to this queue. ++ * ++ * Right now, all groups share the same doorbell, and the doorbell ID ++ * is assigned to group_slot + 1 when the group is assigned a slot. But ++ * we might decide to provide fine grained doorbell assignment at some ++ * point, so don't have to wake up all queues in a group every time one ++ * of them is updated. ++ */ ++ u8 doorbell_id; ++ ++ /** ++ * @priority: Priority of the queue inside the group. ++ * ++ * Must be less than 16 (Only 4 bits available). ++ */ ++ u8 priority; ++#define CSF_MAX_QUEUE_PRIO GENMASK(3, 0) ++ ++ /** @ringbuf: Command stream ring-buffer. */ ++ struct panthor_kernel_bo *ringbuf; ++ ++ /** @iface: Firmware interface. */ ++ struct { ++ /** @mem: FW memory allocated for this interface. */ ++ struct panthor_kernel_bo *mem; ++ ++ /** @input: Input interface. */ ++ struct panthor_fw_ringbuf_input_iface *input; ++ ++ /** @output: Output interface. */ ++ const struct panthor_fw_ringbuf_output_iface *output; ++ ++ /** @input_fw_va: FW virtual address of the input interface buffer. */ ++ u32 input_fw_va; ++ ++ /** @output_fw_va: FW virtual address of the output interface buffer. */ ++ u32 output_fw_va; ++ } iface; ++ ++ /** ++ * @syncwait: Stores information about the synchronization object this ++ * queue is waiting on. ++ */ ++ struct { ++ /** @gpu_va: GPU address of the synchronization object. */ ++ u64 gpu_va; ++ ++ /** @ref: Reference value to compare against. */ ++ u64 ref; ++ ++ /** @gt: True if this is a greater-than test. */ ++ bool gt; ++ ++ /** @sync64: True if this is a 64-bit sync object. */ ++ bool sync64; ++ ++ /** @bo: Buffer object holding the synchronization object. */ ++ struct drm_gem_object *obj; ++ ++ /** @offset: Offset of the synchronization object inside @bo. */ ++ u64 offset; ++ ++ /** ++ * @kmap: Kernel mapping of the buffer object holding the ++ * synchronization object. ++ */ ++ void *kmap; ++ } syncwait; ++ ++ /** @fence_ctx: Fence context fields. */ ++ struct { ++ /** @lock: Used to protect access to all fences allocated by this context. */ ++ spinlock_t lock; ++ ++ /** ++ * @id: Fence context ID. ++ * ++ * Allocated with dma_fence_context_alloc(). ++ */ ++ u64 id; ++ ++ /** @seqno: Sequence number of the last initialized fence. */ ++ atomic64_t seqno; ++ ++ /** ++ * @in_flight_jobs: List containing all in-flight jobs. ++ * ++ * Used to keep track and signal panthor_job::done_fence when the ++ * synchronization object attached to the queue is signaled. ++ */ ++ struct list_head in_flight_jobs; ++ } fence_ctx; ++}; ++ ++/** ++ * enum panthor_group_state - Scheduling group state. ++ */ ++enum panthor_group_state { ++ /** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */ ++ PANTHOR_CS_GROUP_CREATED, ++ ++ /** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */ ++ PANTHOR_CS_GROUP_ACTIVE, ++ ++ /** ++ * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is ++ * inactive/suspended right now. ++ */ ++ PANTHOR_CS_GROUP_SUSPENDED, ++ ++ /** ++ * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated. ++ * ++ * Can no longer be scheduled. The only allowed action is a destruction. ++ */ ++ PANTHOR_CS_GROUP_TERMINATED, ++}; ++ ++/** ++ * struct panthor_group - Scheduling group object ++ */ ++struct panthor_group { ++ /** @refcount: Reference count */ ++ struct kref refcount; ++ ++ /** @ptdev: Device. */ ++ struct panthor_device *ptdev; ++ ++ /** @vm: VM bound to the group. */ ++ struct panthor_vm *vm; ++ ++ /** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */ ++ u64 compute_core_mask; ++ ++ /** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */ ++ u64 fragment_core_mask; ++ ++ /** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */ ++ u64 tiler_core_mask; ++ ++ /** @max_compute_cores: Maximum number of shader cores used for compute jobs. */ ++ u8 max_compute_cores; ++ ++ /** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */ ++ u8 max_fragment_cores; ++ ++ /** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */ ++ u8 max_tiler_cores; ++ ++ /** @priority: Group priority (check panthor_csg_priority). */ ++ u8 priority; ++ ++ /** @blocked_queues: Bitmask reflecting the blocked queues. */ ++ u32 blocked_queues; ++ ++ /** @idle_queues: Bitmask reflecting the idle queues. */ ++ u32 idle_queues; ++ ++ /** @fatal_lock: Lock used to protect access to fatal fields. */ ++ spinlock_t fatal_lock; ++ ++ /** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */ ++ u32 fatal_queues; ++ ++ /** @tiler_oom: Mask of queues that have a tiler OOM event to process. */ ++ atomic_t tiler_oom; ++ ++ /** @queue_count: Number of queues in this group. */ ++ u32 queue_count; ++ ++ /** @queues: Queues owned by this group. */ ++ struct panthor_queue *queues[MAX_CS_PER_CSG]; ++ ++ /** ++ * @csg_id: ID of the FW group slot. ++ * ++ * -1 when the group is not scheduled/active. ++ */ ++ int csg_id; ++ ++ /** ++ * @destroyed: True when the group has been destroyed. ++ * ++ * If a group is destroyed it becomes useless: no further jobs can be submitted ++ * to its queues. We simply wait for all references to be dropped so we can ++ * release the group object. ++ */ ++ bool destroyed; ++ ++ /** ++ * @timedout: True when a timeout occurred on any of the queues owned by ++ * this group. ++ * ++ * Timeouts can be reported by drm_sched or by the FW. In any case, any ++ * timeout situation is unrecoverable, and the group becomes useless. ++ * We simply wait for all references to be dropped so we can release the ++ * group object. ++ */ ++ bool timedout; ++ ++ /** ++ * @syncobjs: Pool of per-queue synchronization objects. ++ * ++ * One sync object per queue. The position of the sync object is ++ * determined by the queue index. ++ */ ++ struct panthor_kernel_bo *syncobjs; ++ ++ /** @state: Group state. */ ++ enum panthor_group_state state; ++ ++ /** ++ * @suspend_buf: Suspend buffer. ++ * ++ * Stores the state of the group and its queues when a group is suspended. ++ * Used at resume time to restore the group in its previous state. ++ * ++ * The size of the suspend buffer is exposed through the FW interface. ++ */ ++ struct panthor_kernel_bo *suspend_buf; ++ ++ /** ++ * @protm_suspend_buf: Protection mode suspend buffer. ++ * ++ * Stores the state of the group and its queues when a group that's in ++ * protection mode is suspended. ++ * ++ * Used at resume time to restore the group in its previous state. ++ * ++ * The size of the protection mode suspend buffer is exposed through the ++ * FW interface. ++ */ ++ struct panthor_kernel_bo *protm_suspend_buf; ++ ++ /** @sync_upd_work: Work used to check/signal job fences. */ ++ struct work_struct sync_upd_work; ++ ++ /** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */ ++ struct work_struct tiler_oom_work; ++ ++ /** @term_work: Work used to finish the group termination procedure. */ ++ struct work_struct term_work; ++ ++ /** ++ * @release_work: Work used to release group resources. ++ * ++ * We need to postpone the group release to avoid a deadlock when ++ * the last ref is released in the tick work. ++ */ ++ struct work_struct release_work; ++ ++ /** ++ * @run_node: Node used to insert the group in the ++ * panthor_group::groups::{runnable,idle} and ++ * panthor_group::reset.stopped_groups lists. ++ */ ++ struct list_head run_node; ++ ++ /** ++ * @wait_node: Node used to insert the group in the ++ * panthor_group::groups::waiting list. ++ */ ++ struct list_head wait_node; ++}; ++ ++/** ++ * group_queue_work() - Queue a group work ++ * @group: Group to queue the work for. ++ * @wname: Work name. ++ * ++ * Grabs a ref and queue a work item to the scheduler workqueue. If ++ * the work was already queued, we release the reference we grabbed. ++ * ++ * Work callbacks must release the reference we grabbed here. ++ */ ++#define group_queue_work(group, wname) \ ++ do { \ ++ group_get(group); \ ++ if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \ ++ group_put(group); \ ++ } while (0) ++ ++/** ++ * sched_queue_work() - Queue a scheduler work. ++ * @sched: Scheduler object. ++ * @wname: Work name. ++ * ++ * Conditionally queues a scheduler work if no reset is pending/in-progress. ++ */ ++#define sched_queue_work(sched, wname) \ ++ do { \ ++ if (!atomic_read(&(sched)->reset.in_progress) && \ ++ !panthor_device_reset_is_pending((sched)->ptdev)) \ ++ queue_work((sched)->wq, &(sched)->wname ## _work); \ ++ } while (0) ++ ++/** ++ * sched_queue_delayed_work() - Queue a scheduler delayed work. ++ * @sched: Scheduler object. ++ * @wname: Work name. ++ * @delay: Work delay in jiffies. ++ * ++ * Conditionally queues a scheduler delayed work if no reset is ++ * pending/in-progress. ++ */ ++#define sched_queue_delayed_work(sched, wname, delay) \ ++ do { \ ++ if (!atomic_read(&sched->reset.in_progress) && \ ++ !panthor_device_reset_is_pending((sched)->ptdev)) \ ++ mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \ ++ } while (0) ++ ++/* ++ * We currently set the maximum of groups per file to an arbitrary low value. ++ * But this can be updated if we need more. ++ */ ++#define MAX_GROUPS_PER_POOL 128 ++ ++/** ++ * struct panthor_group_pool - Group pool ++ * ++ * Each file get assigned a group pool. ++ */ ++struct panthor_group_pool { ++ /** @xa: Xarray used to manage group handles. */ ++ struct xarray xa; ++}; ++ ++/** ++ * struct panthor_job - Used to manage GPU job ++ */ ++struct panthor_job { ++ /** @base: Inherit from drm_sched_job. */ ++ struct drm_sched_job base; ++ ++ /** @refcount: Reference count. */ ++ struct kref refcount; ++ ++ /** @group: Group of the queue this job will be pushed to. */ ++ struct panthor_group *group; ++ ++ /** @queue_idx: Index of the queue inside @group. */ ++ u32 queue_idx; ++ ++ /** @call_info: Information about the userspace command stream call. */ ++ struct { ++ /** @start: GPU address of the userspace command stream. */ ++ u64 start; ++ ++ /** @size: Size of the userspace command stream. */ ++ u32 size; ++ ++ /** ++ * @latest_flush: Flush ID at the time the userspace command ++ * stream was built. ++ * ++ * Needed for the flush reduction mechanism. ++ */ ++ u32 latest_flush; ++ } call_info; ++ ++ /** @ringbuf: Position of this job is in the ring buffer. */ ++ struct { ++ /** @start: Start offset. */ ++ u64 start; ++ ++ /** @end: End offset. */ ++ u64 end; ++ } ringbuf; ++ ++ /** ++ * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs ++ * list. ++ */ ++ struct list_head node; ++ ++ /** @done_fence: Fence signaled when the job is finished or cancelled. */ ++ struct dma_fence *done_fence; ++}; ++ ++static void ++panthor_queue_put_syncwait_obj(struct panthor_queue *queue) ++{ ++ if (queue->syncwait.kmap) { ++ struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap); ++ ++ drm_gem_vunmap_unlocked(queue->syncwait.obj, &map); ++ queue->syncwait.kmap = NULL; ++ } ++ ++ drm_gem_object_put(queue->syncwait.obj); ++ queue->syncwait.obj = NULL; ++} ++ ++static void * ++panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_gem_object *bo; ++ struct iosys_map map; ++ int ret; ++ ++ if (queue->syncwait.kmap) ++ return queue->syncwait.kmap + queue->syncwait.offset; ++ ++ bo = panthor_vm_get_bo_for_va(group->vm, ++ queue->syncwait.gpu_va, ++ &queue->syncwait.offset); ++ if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo))) ++ goto err_put_syncwait_obj; ++ ++ queue->syncwait.obj = &bo->base.base; ++ ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ goto err_put_syncwait_obj; ++ ++ queue->syncwait.kmap = map.vaddr; ++ if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) ++ goto err_put_syncwait_obj; ++ ++ return queue->syncwait.kmap + queue->syncwait.offset; ++ ++err_put_syncwait_obj: ++ panthor_queue_put_syncwait_obj(queue); ++ return NULL; ++} ++ ++static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue) ++{ ++ if (IS_ERR_OR_NULL(queue)) ++ return; ++ ++ if (queue->entity.fence_context) ++ drm_sched_entity_destroy(&queue->entity); ++ ++ if (queue->scheduler.ops) ++ drm_sched_fini(&queue->scheduler); ++ ++ panthor_queue_put_syncwait_obj(queue); ++ ++ panthor_kernel_bo_destroy(group->vm, queue->ringbuf); ++ panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem); ++ ++ kfree(queue); ++} ++ ++static void group_release_work(struct work_struct *work) ++{ ++ struct panthor_group *group = container_of(work, ++ struct panthor_group, ++ release_work); ++ struct panthor_device *ptdev = group->ptdev; ++ u32 i; ++ ++ for (i = 0; i < group->queue_count; i++) ++ group_free_queue(group, group->queues[i]); ++ ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf); ++ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf); ++ panthor_kernel_bo_destroy(group->vm, group->syncobjs); ++ ++ panthor_vm_put(group->vm); ++ kfree(group); ++} ++ ++static void group_release(struct kref *kref) ++{ ++ struct panthor_group *group = container_of(kref, ++ struct panthor_group, ++ refcount); ++ struct panthor_device *ptdev = group->ptdev; ++ ++ drm_WARN_ON(&ptdev->base, group->csg_id >= 0); ++ drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node)); ++ drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node)); ++ ++ queue_work(panthor_cleanup_wq, &group->release_work); ++} ++ ++static void group_put(struct panthor_group *group) ++{ ++ if (group) ++ kref_put(&group->refcount, group_release); ++} ++ ++static struct panthor_group * ++group_get(struct panthor_group *group) ++{ ++ if (group) ++ kref_get(&group->refcount); ++ ++ return group; ++} ++ ++/** ++ * group_bind_locked() - Bind a group to a group slot ++ * @group: Group. ++ * @csg_id: Slot. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++group_bind_locked(struct panthor_group *group, u32 csg_id) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_csg_slot *csg_slot; ++ int ret; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS || ++ ptdev->scheduler->csg_slots[csg_id].group)) ++ return -EINVAL; ++ ++ ret = panthor_vm_active(group->vm); ++ if (ret) ++ return ret; ++ ++ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ group_get(group); ++ group->csg_id = csg_id; ++ ++ /* Dummy doorbell allocation: doorbell is assigned to the group and ++ * all queues use the same doorbell. ++ * ++ * TODO: Implement LRU-based doorbell assignment, so the most often ++ * updated queues get their own doorbell, thus avoiding useless checks ++ * on queues belonging to the same group that are rarely updated. ++ */ ++ for (u32 i = 0; i < group->queue_count; i++) ++ group->queues[i]->doorbell_id = csg_id + 1; ++ ++ csg_slot->group = group; ++ ++ return 0; ++} ++ ++/** ++ * group_unbind_locked() - Unbind a group from a slot. ++ * @group: Group to unbind. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++group_unbind_locked(struct panthor_group *group) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_csg_slot *slot; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS)) ++ return -EINVAL; ++ ++ if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE)) ++ return -EINVAL; ++ ++ slot = &ptdev->scheduler->csg_slots[group->csg_id]; ++ panthor_vm_idle(group->vm); ++ group->csg_id = -1; ++ ++ /* Tiler OOM events will be re-issued next time the group is scheduled. */ ++ atomic_set(&group->tiler_oom, 0); ++ cancel_work(&group->tiler_oom_work); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ group->queues[i]->doorbell_id = -1; ++ ++ slot->group = NULL; ++ ++ group_put(group); ++ return 0; ++} ++ ++/** ++ * cs_slot_prog_locked() - Program a queue slot ++ * @ptdev: Device. ++ * @csg_id: Group slot ID. ++ * @cs_id: Queue slot ID. ++ * ++ * Program a queue slot with the queue information so things can start being ++ * executed on this queue. ++ * ++ * The group slot must have a group bound to it already (group_bind_locked()). ++ */ ++static void ++cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id]; ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ queue->iface.input->extract = queue->iface.output->extract; ++ drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract); ++ ++ cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf); ++ cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); ++ cs_iface->input->ringbuf_input = queue->iface.input_fw_va; ++ cs_iface->input->ringbuf_output = queue->iface.output_fw_va; ++ cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) | ++ CS_CONFIG_DOORBELL(queue->doorbell_id); ++ cs_iface->input->ack_irq_mask = ~0; ++ panthor_fw_update_reqs(cs_iface, req, ++ CS_IDLE_SYNC_WAIT | ++ CS_IDLE_EMPTY | ++ CS_STATE_START | ++ CS_EXTRACT_EVENT, ++ CS_IDLE_SYNC_WAIT | ++ CS_IDLE_EMPTY | ++ CS_STATE_MASK | ++ CS_EXTRACT_EVENT); ++ if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) { ++ drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time); ++ queue->timeout_suspended = false; ++ } ++} ++ ++/** ++ * @cs_slot_reset_locked() - Reset a queue slot ++ * @ptdev: Device. ++ * @csg_id: Group slot. ++ * @cs_id: Queue slot. ++ * ++ * Change the queue slot state to STOP and suspend the queue timeout if ++ * the queue is not blocked. ++ * ++ * The group slot must have a group bound to it (group_bind_locked()). ++ */ ++static int ++cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; ++ struct panthor_queue *queue = group->queues[cs_id]; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ panthor_fw_update_reqs(cs_iface, req, ++ CS_STATE_STOP, ++ CS_STATE_MASK); ++ ++ /* If the queue is blocked, we want to keep the timeout running, so ++ * we can detect unbounded waits and kill the group when that happens. ++ */ ++ if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) { ++ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); ++ queue->timeout_suspended = true; ++ WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS)); ++ } ++ ++ return 0; ++} ++ ++/** ++ * csg_slot_sync_priority_locked() - Synchronize the group slot priority ++ * @ptdev: Device. ++ * @csg_id: Group slot ID. ++ * ++ * Group slot priority update happens asynchronously. When we receive a ++ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can ++ * reflect it to our panthor_csg_slot object. ++ */ ++static void ++csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28; ++} ++ ++/** ++ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority ++ * @ptdev: Device. ++ * @csg_id: Group slot. ++ * @cs_id: Queue slot. ++ * ++ * Queue state is updated on group suspend or STATUS_UPDATE event. ++ */ ++static void ++cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) ++{ ++ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; ++ struct panthor_queue *queue = group->queues[cs_id]; ++ struct panthor_fw_cs_iface *cs_iface = ++ panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id); ++ ++ u32 status_wait_cond; ++ ++ switch (cs_iface->output->status_blocked_reason) { ++ case CS_STATUS_BLOCKED_REASON_UNBLOCKED: ++ if (queue->iface.input->insert == queue->iface.output->extract && ++ cs_iface->output->status_scoreboards == 0) ++ group->idle_queues |= BIT(cs_id); ++ break; ++ ++ case CS_STATUS_BLOCKED_REASON_SYNC_WAIT: ++ if (list_empty(&group->wait_node)) { ++ list_move_tail(&group->wait_node, ++ &group->ptdev->scheduler->groups.waiting); ++ } ++ group->blocked_queues |= BIT(cs_id); ++ queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr; ++ queue->syncwait.ref = cs_iface->output->status_wait_sync_value; ++ status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK; ++ queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT; ++ if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) { ++ u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi; ++ ++ queue->syncwait.sync64 = true; ++ queue->syncwait.ref |= sync_val_hi << 32; ++ } else { ++ queue->syncwait.sync64 = false; ++ } ++ break; ++ ++ default: ++ /* Other reasons are not blocking. Consider the queue as runnable ++ * in those cases. ++ */ ++ break; ++ } ++} ++ ++static void ++csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ u32 i; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ group->idle_queues = 0; ++ group->blocked_queues = 0; ++ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ cs_slot_sync_queue_state_locked(ptdev, csg_id, i); ++ } ++} ++ ++static void ++csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_group *group; ++ enum panthor_group_state new_state, old_state; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ group = csg_slot->group; ++ ++ if (!group) ++ return; ++ ++ old_state = group->state; ++ switch (csg_iface->output->ack & CSG_STATE_MASK) { ++ case CSG_STATE_START: ++ case CSG_STATE_RESUME: ++ new_state = PANTHOR_CS_GROUP_ACTIVE; ++ break; ++ case CSG_STATE_TERMINATE: ++ new_state = PANTHOR_CS_GROUP_TERMINATED; ++ break; ++ case CSG_STATE_SUSPEND: ++ new_state = PANTHOR_CS_GROUP_SUSPENDED; ++ break; ++ } ++ ++ if (old_state == new_state) ++ return; ++ ++ if (new_state == PANTHOR_CS_GROUP_SUSPENDED) ++ csg_slot_sync_queues_state_locked(ptdev, csg_id); ++ ++ if (old_state == PANTHOR_CS_GROUP_ACTIVE) { ++ u32 i; ++ ++ /* Reset the queue slots so we start from a clean ++ * state when starting/resuming a new group on this ++ * CSG slot. No wait needed here, and no ringbell ++ * either, since the CS slot will only be re-used ++ * on the next CSG start operation. ++ */ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ cs_slot_reset_locked(ptdev, csg_id, i); ++ } ++ } ++ ++ group->state = new_state; ++} ++ ++static int ++csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) ++{ ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_csg_slot *csg_slot; ++ struct panthor_group *group; ++ u32 queue_mask = 0, i; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (priority > MAX_CSG_PRIO) ++ return -EINVAL; ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS)) ++ return -EINVAL; ++ ++ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ group = csg_slot->group; ++ if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE) ++ return 0; ++ ++ csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id); ++ ++ for (i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) { ++ cs_slot_prog_locked(ptdev, csg_id, i); ++ queue_mask |= BIT(i); ++ } ++ } ++ ++ csg_iface->input->allow_compute = group->compute_core_mask; ++ csg_iface->input->allow_fragment = group->fragment_core_mask; ++ csg_iface->input->allow_other = group->tiler_core_mask; ++ csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | ++ CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | ++ CSG_EP_REQ_TILER(group->max_tiler_cores) | ++ CSG_EP_REQ_PRIORITY(priority); ++ csg_iface->input->config = panthor_vm_as(group->vm); ++ ++ if (group->suspend_buf) ++ csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf); ++ else ++ csg_iface->input->suspend_buf = 0; ++ ++ if (group->protm_suspend_buf) { ++ csg_iface->input->protm_suspend_buf = ++ panthor_kernel_bo_gpuva(group->protm_suspend_buf); ++ } else { ++ csg_iface->input->protm_suspend_buf = 0; ++ } ++ ++ csg_iface->input->ack_irq_mask = ~0; ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask); ++ return 0; ++} ++ ++static void ++cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 fatal; ++ u64 info; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ fatal = cs_iface->output->fatal; ++ info = cs_iface->output->fatal_info; ++ ++ if (group) ++ group->fatal_queues |= BIT(cs_id); ++ ++ sched_queue_delayed_work(sched, tick, 0); ++ drm_warn(&ptdev->base, ++ "CSG slot %d CS slot: %d\n" ++ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" ++ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", ++ csg_id, cs_id, ++ (unsigned int)CS_EXCEPTION_TYPE(fatal), ++ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)), ++ (unsigned int)CS_EXCEPTION_DATA(fatal), ++ info); ++} ++ ++static void ++cs_slot_process_fault_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_queue *queue = group && cs_id < group->queue_count ? ++ group->queues[cs_id] : NULL; ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 fault; ++ u64 info; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ fault = cs_iface->output->fault; ++ info = cs_iface->output->fault_info; ++ ++ if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) { ++ u64 cs_extract = queue->iface.output->extract; ++ struct panthor_job *job; ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) { ++ if (cs_extract >= job->ringbuf.end) ++ continue; ++ ++ if (cs_extract < job->ringbuf.start) ++ break; ++ ++ dma_fence_set_error(job->done_fence, -EINVAL); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ } ++ ++ drm_warn(&ptdev->base, ++ "CSG slot %d CS slot: %d\n" ++ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" ++ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" ++ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", ++ csg_id, cs_id, ++ (unsigned int)CS_EXCEPTION_TYPE(fault), ++ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)), ++ (unsigned int)CS_EXCEPTION_DATA(fault), ++ info); ++} ++ ++static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 renderpasses_in_flight, pending_frag_count; ++ struct panthor_heap_pool *heaps = NULL; ++ u64 heap_address, new_chunk_va = 0; ++ u32 vt_start, vt_end, frag_end; ++ int ret, csg_id; ++ ++ mutex_lock(&sched->lock); ++ csg_id = group->csg_id; ++ if (csg_id >= 0) { ++ struct panthor_fw_cs_iface *cs_iface; ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ heaps = panthor_vm_get_heap_pool(group->vm, false); ++ heap_address = cs_iface->output->heap_address; ++ vt_start = cs_iface->output->heap_vt_start; ++ vt_end = cs_iface->output->heap_vt_end; ++ frag_end = cs_iface->output->heap_frag_end; ++ renderpasses_in_flight = vt_start - frag_end; ++ pending_frag_count = vt_end - frag_end; ++ } ++ mutex_unlock(&sched->lock); ++ ++ /* The group got scheduled out, we stop here. We will get a new tiler OOM event ++ * when it's scheduled again. ++ */ ++ if (unlikely(csg_id < 0)) ++ return 0; ++ ++ if (!heaps || frag_end > vt_end || vt_end >= vt_start) { ++ ret = -EINVAL; ++ } else { ++ /* We do the allocation without holding the scheduler lock to avoid ++ * blocking the scheduling. ++ */ ++ ret = panthor_heap_grow(heaps, heap_address, ++ renderpasses_in_flight, ++ pending_frag_count, &new_chunk_va); ++ } ++ ++ if (ret && ret != -EBUSY) { ++ drm_warn(&ptdev->base, "Failed to extend the tiler heap\n"); ++ group->fatal_queues |= BIT(cs_id); ++ sched_queue_delayed_work(sched, tick, 0); ++ goto out_put_heap_pool; ++ } ++ ++ mutex_lock(&sched->lock); ++ csg_id = group->csg_id; ++ if (csg_id >= 0) { ++ struct panthor_fw_csg_iface *csg_iface; ++ struct panthor_fw_cs_iface *cs_iface; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ ++ cs_iface->input->heap_start = new_chunk_va; ++ cs_iface->input->heap_end = new_chunk_va; ++ panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM); ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id)); ++ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); ++ } ++ mutex_unlock(&sched->lock); ++ ++ /* We allocated a chunck, but couldn't link it to the heap ++ * context because the group was scheduled out while we were ++ * allocating memory. We need to return this chunk to the heap. ++ */ ++ if (unlikely(csg_id < 0 && new_chunk_va)) ++ panthor_heap_return_chunk(heaps, heap_address, new_chunk_va); ++ ++ ret = 0; ++ ++out_put_heap_pool: ++ panthor_heap_pool_put(heaps); ++ return ret; ++} ++ ++static void group_tiler_oom_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, tiler_oom_work); ++ u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0); ++ ++ while (tiler_oom) { ++ u32 cs_id = ffs(tiler_oom) - 1; ++ ++ group_process_tiler_oom(group, cs_id); ++ tiler_oom &= ~BIT(cs_id); ++ } ++ ++ group_put(group); ++} ++ ++static void ++cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, !group)) ++ return; ++ ++ atomic_or(BIT(cs_id), &group->tiler_oom); ++ ++ /* We don't use group_queue_work() here because we want to queue the ++ * work item to the heap_alloc_wq. ++ */ ++ group_get(group); ++ if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work)) ++ group_put(group); ++} ++ ++static bool cs_slot_process_irq_locked(struct panthor_device *ptdev, ++ u32 csg_id, u32 cs_id) ++{ ++ struct panthor_fw_cs_iface *cs_iface; ++ u32 req, ack, events; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); ++ req = cs_iface->input->req; ++ ack = cs_iface->output->ack; ++ events = (req ^ ack) & CS_EVT_MASK; ++ ++ if (events & CS_FATAL) ++ cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id); ++ ++ if (events & CS_FAULT) ++ cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id); ++ ++ if (events & CS_TILER_OOM) ++ cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id); ++ ++ /* We don't acknowledge the TILER_OOM event since its handling is ++ * deferred to a separate work. ++ */ ++ panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT); ++ ++ return (events & (CS_FAULT | CS_TILER_OOM)) != 0; ++} ++ ++static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE; ++} ++ ++static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ sched->might_have_idle_groups = true; ++ ++ /* Schedule a tick so we can evict idle groups and schedule non-idle ++ * ones. This will also update runtime PM and devfreq busy/idle states, ++ * so the device can lower its frequency or get suspended. ++ */ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void csg_slot_sync_update_locked(struct panthor_device *ptdev, ++ u32 csg_id) ++{ ++ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (group) ++ group_queue_work(group, sync_upd); ++ ++ sched_queue_work(ptdev->scheduler, sync_upd); ++} ++ ++static void ++csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ struct panthor_group *group = csg_slot->group; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); ++ ++ group = csg_slot->group; ++ if (!drm_WARN_ON(&ptdev->base, !group)) ++ group->timedout = true; ++ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id) ++{ ++ u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events; ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 ring_cs_db_mask = 0; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) ++ return; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ req = READ_ONCE(csg_iface->input->req); ++ ack = READ_ONCE(csg_iface->output->ack); ++ cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req); ++ cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack); ++ csg_events = (req ^ ack) & CSG_EVT_MASK; ++ ++ /* There may not be any pending CSG/CS interrupts to process */ ++ if (req == ack && cs_irq_req == cs_irq_ack) ++ return; ++ ++ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before ++ * examining the CS_ACK & CS_REQ bits. This would ensure that Host ++ * doesn't miss an interrupt for the CS in the race scenario where ++ * whilst Host is servicing an interrupt for the CS, firmware sends ++ * another interrupt for that CS. ++ */ ++ csg_iface->input->cs_irq_ack = cs_irq_req; ++ ++ panthor_fw_update_reqs(csg_iface, req, ack, ++ CSG_SYNC_UPDATE | ++ CSG_IDLE | ++ CSG_PROGRESS_TIMER_EVENT); ++ ++ if (csg_events & CSG_IDLE) ++ csg_slot_process_idle_event_locked(ptdev, csg_id); ++ ++ if (csg_events & CSG_PROGRESS_TIMER_EVENT) ++ csg_slot_process_progress_timer_event_locked(ptdev, csg_id); ++ ++ cs_irqs = cs_irq_req ^ cs_irq_ack; ++ while (cs_irqs) { ++ u32 cs_id = ffs(cs_irqs) - 1; ++ ++ if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id)) ++ ring_cs_db_mask |= BIT(cs_id); ++ ++ cs_irqs &= ~BIT(cs_id); ++ } ++ ++ if (csg_events & CSG_SYNC_UPDATE) ++ csg_slot_sync_update_locked(ptdev, csg_id); ++ ++ if (ring_cs_db_mask) ++ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask); ++ ++ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); ++} ++ ++static void sched_process_idle_event_locked(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ /* Acknowledge the idle event and schedule a tick. */ ++ panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE); ++ sched_queue_delayed_work(ptdev->scheduler, tick, 0); ++} ++ ++/** ++ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ ++ * @ptdev: Device. ++ */ ++static void sched_process_global_irq_locked(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ u32 req, ack, evts; ++ ++ lockdep_assert_held(&ptdev->scheduler->lock); ++ ++ req = READ_ONCE(glb_iface->input->req); ++ ack = READ_ONCE(glb_iface->output->ack); ++ evts = (req ^ ack) & GLB_EVT_MASK; ++ ++ if (evts & GLB_IDLE) ++ sched_process_idle_event_locked(ptdev); ++} ++ ++static void process_fw_events_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, ++ fw_events_work); ++ u32 events = atomic_xchg(&sched->fw_events, 0); ++ struct panthor_device *ptdev = sched->ptdev; ++ ++ mutex_lock(&sched->lock); ++ ++ if (events & JOB_INT_GLOBAL_IF) { ++ sched_process_global_irq_locked(ptdev); ++ events &= ~JOB_INT_GLOBAL_IF; ++ } ++ ++ while (events) { ++ u32 csg_id = ffs(events) - 1; ++ ++ sched_process_csg_irq_locked(ptdev, csg_id); ++ events &= ~BIT(csg_id); ++ } ++ ++ mutex_unlock(&sched->lock); ++} ++ ++/** ++ * panthor_sched_report_fw_events() - Report FW events to the scheduler. ++ */ ++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events) ++{ ++ if (!ptdev->scheduler) ++ return; ++ ++ atomic_or(events, &ptdev->scheduler->fw_events); ++ sched_queue_work(ptdev->scheduler, fw_events); ++} ++ ++static const char *fence_get_driver_name(struct dma_fence *fence) ++{ ++ return "panthor"; ++} ++ ++static const char *queue_fence_get_timeline_name(struct dma_fence *fence) ++{ ++ return "queue-fence"; ++} ++ ++static const struct dma_fence_ops panthor_queue_fence_ops = { ++ .get_driver_name = fence_get_driver_name, ++ .get_timeline_name = queue_fence_get_timeline_name, ++}; ++ ++/** ++ */ ++struct panthor_csg_slots_upd_ctx { ++ u32 update_mask; ++ u32 timedout_mask; ++ struct { ++ u32 value; ++ u32 mask; ++ } requests[MAX_CSGS]; ++}; ++ ++static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx) ++{ ++ memset(ctx, 0, sizeof(*ctx)); ++} ++ ++static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev, ++ struct panthor_csg_slots_upd_ctx *ctx, ++ u32 csg_id, u32 value, u32 mask) ++{ ++ if (drm_WARN_ON(&ptdev->base, !mask) || ++ drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) ++ return; ++ ++ ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask); ++ ctx->requests[csg_id].mask |= mask; ++ ctx->update_mask |= BIT(csg_id); ++} ++ ++static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev, ++ struct panthor_csg_slots_upd_ctx *ctx) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 update_slots = ctx->update_mask; ++ ++ lockdep_assert_held(&sched->lock); ++ ++ if (!ctx->update_mask) ++ return 0; ++ ++ while (update_slots) { ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 csg_id = ffs(update_slots) - 1; ++ ++ update_slots &= ~BIT(csg_id); ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ panthor_fw_update_reqs(csg_iface, req, ++ ctx->requests[csg_id].value, ++ ctx->requests[csg_id].mask); ++ } ++ ++ panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask); ++ ++ update_slots = ctx->update_mask; ++ while (update_slots) { ++ struct panthor_fw_csg_iface *csg_iface; ++ u32 csg_id = ffs(update_slots) - 1; ++ u32 req_mask = ctx->requests[csg_id].mask, acked; ++ int ret; ++ ++ update_slots &= ~BIT(csg_id); ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ ++ ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100); ++ ++ if (acked & CSG_ENDPOINT_CONFIG) ++ csg_slot_sync_priority_locked(ptdev, csg_id); ++ ++ if (acked & CSG_STATE_MASK) ++ csg_slot_sync_state_locked(ptdev, csg_id); ++ ++ if (acked & CSG_STATUS_UPDATE) { ++ csg_slot_sync_queues_state_locked(ptdev, csg_id); ++ csg_slot_sync_idle_state_locked(ptdev, csg_id); ++ } ++ ++ if (ret && acked != req_mask && ++ ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) { ++ drm_err(&ptdev->base, "CSG %d update request timedout", csg_id); ++ ctx->timedout_mask |= BIT(csg_id); ++ } ++ } ++ ++ if (ctx->timedout_mask) ++ return -ETIMEDOUT; ++ ++ return 0; ++} ++ ++struct panthor_sched_tick_ctx { ++ struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT]; ++ struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT]; ++ u32 idle_group_count; ++ u32 group_count; ++ enum panthor_csg_priority min_priority; ++ struct panthor_vm *vms[MAX_CS_PER_CSG]; ++ u32 as_count; ++ bool immediate_tick; ++ u32 csg_upd_failed_mask; ++}; ++ ++static bool ++tick_ctx_is_full(const struct panthor_scheduler *sched, ++ const struct panthor_sched_tick_ctx *ctx) ++{ ++ return ctx->group_count == sched->csg_slot_count; ++} ++ ++static bool ++group_is_idle(struct panthor_group *group) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ u32 inactive_queues; ++ ++ if (group->csg_id >= 0) ++ return ptdev->scheduler->csg_slots[group->csg_id].idle; ++ ++ inactive_queues = group->idle_queues | group->blocked_queues; ++ return hweight32(inactive_queues) == group->queue_count; ++} ++ ++static bool ++group_can_run(struct panthor_group *group) ++{ ++ return group->state != PANTHOR_CS_GROUP_TERMINATED && ++ !group->destroyed && group->fatal_queues == 0 && ++ !group->timedout; ++} ++ ++static void ++tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ struct list_head *queue, ++ bool skip_idle_groups, ++ bool owned_by_tick_ctx) ++{ ++ struct panthor_group *group, *tmp; ++ ++ if (tick_ctx_is_full(sched, ctx)) ++ return; ++ ++ list_for_each_entry_safe(group, tmp, queue, run_node) { ++ u32 i; ++ ++ if (!group_can_run(group)) ++ continue; ++ ++ if (skip_idle_groups && group_is_idle(group)) ++ continue; ++ ++ for (i = 0; i < ctx->as_count; i++) { ++ if (ctx->vms[i] == group->vm) ++ break; ++ } ++ ++ if (i == ctx->as_count && ctx->as_count == sched->as_slot_count) ++ continue; ++ ++ if (!owned_by_tick_ctx) ++ group_get(group); ++ ++ list_move_tail(&group->run_node, &ctx->groups[group->priority]); ++ ctx->group_count++; ++ if (group_is_idle(group)) ++ ctx->idle_group_count++; ++ ++ if (i == ctx->as_count) ++ ctx->vms[ctx->as_count++] = group->vm; ++ ++ if (ctx->min_priority > group->priority) ++ ctx->min_priority = group->priority; ++ ++ if (tick_ctx_is_full(sched, ctx)) ++ return; ++ } ++} ++ ++static void ++tick_ctx_insert_old_group(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ struct panthor_group *group, ++ bool full_tick) ++{ ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id]; ++ struct panthor_group *other_group; ++ ++ if (!full_tick) { ++ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); ++ return; ++ } ++ ++ /* Rotate to make sure groups with lower CSG slot ++ * priorities have a chance to get a higher CSG slot ++ * priority next time they get picked. This priority ++ * has an impact on resource request ordering, so it's ++ * important to make sure we don't let one group starve ++ * all other groups with the same group priority. ++ */ ++ list_for_each_entry(other_group, ++ &ctx->old_groups[csg_slot->group->priority], ++ run_node) { ++ struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id]; ++ ++ if (other_csg_slot->priority > csg_slot->priority) { ++ list_add_tail(&csg_slot->group->run_node, &other_group->run_node); ++ return; ++ } ++ } ++ ++ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); ++} ++ ++static void ++tick_ctx_init(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx, ++ bool full_tick) ++{ ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ int ret; ++ u32 i; ++ ++ memset(ctx, 0, sizeof(*ctx)); ++ csgs_upd_ctx_init(&upd_ctx); ++ ++ ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT; ++ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { ++ INIT_LIST_HEAD(&ctx->groups[i]); ++ INIT_LIST_HEAD(&ctx->old_groups[i]); ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ struct panthor_group *group = csg_slot->group; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ if (!group) ++ continue; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, i); ++ group_get(group); ++ ++ /* If there was unhandled faults on the VM, force processing of ++ * CSG IRQs, so we can flag the faulty queue. ++ */ ++ if (panthor_vm_has_unhandled_faults(group->vm)) { ++ sched_process_csg_irq_locked(ptdev, i); ++ ++ /* No fatal fault reported, flag all queues as faulty. */ ++ if (!group->fatal_queues) ++ group->fatal_queues |= GENMASK(group->queue_count - 1, 0); ++ } ++ ++ tick_ctx_insert_old_group(sched, ctx, group, full_tick); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, ++ csg_iface->output->ack ^ CSG_STATUS_UPDATE, ++ CSG_STATUS_UPDATE); ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ } ++} ++ ++#define NUM_INSTRS_PER_SLOT 16 ++ ++static void ++group_term_post_processing(struct panthor_group *group) ++{ ++ struct panthor_job *job, *tmp; ++ LIST_HEAD(faulty_jobs); ++ bool cookie; ++ u32 i = 0; ++ ++ if (drm_WARN_ON(&group->ptdev->base, group_can_run(group))) ++ return; ++ ++ cookie = dma_fence_begin_signalling(); ++ for (i = 0; i < group->queue_count; i++) { ++ struct panthor_queue *queue = group->queues[i]; ++ struct panthor_syncobj_64b *syncobj; ++ int err; ++ ++ if (group->fatal_queues & BIT(i)) ++ err = -EINVAL; ++ else if (group->timedout) ++ err = -ETIMEDOUT; ++ else ++ err = -ECANCELED; ++ ++ if (!queue) ++ continue; ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) { ++ list_move_tail(&job->node, &faulty_jobs); ++ dma_fence_set_error(job->done_fence, err); ++ dma_fence_signal_locked(job->done_fence); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ ++ /* Manually update the syncobj seqno to unblock waiters. */ ++ syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj)); ++ syncobj->status = ~0; ++ syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno); ++ sched_queue_work(group->ptdev->scheduler, sync_upd); ++ } ++ dma_fence_end_signalling(cookie); ++ ++ list_for_each_entry_safe(job, tmp, &faulty_jobs, node) { ++ list_del_init(&job->node); ++ panthor_job_put(&job->base); ++ } ++} ++ ++static void group_term_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, term_work); ++ ++ group_term_post_processing(group); ++ group_put(group); ++} ++ ++static void ++tick_ctx_cleanup(struct panthor_scheduler *sched, ++ struct panthor_sched_tick_ctx *ctx) ++{ ++ struct panthor_group *group, *tmp; ++ u32 i; ++ ++ for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) { ++ list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) { ++ /* If everything went fine, we should only have groups ++ * to be terminated in the old_groups lists. ++ */ ++ drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask && ++ group_can_run(group)); ++ ++ if (!group_can_run(group)) { ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } else if (group->csg_id >= 0) { ++ list_del_init(&group->run_node); ++ } else { ++ list_move(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } ++ group_put(group); ++ } ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { ++ /* If everything went fine, the groups to schedule lists should ++ * be empty. ++ */ ++ drm_WARN_ON(&group->ptdev->base, ++ !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i])); ++ ++ list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) { ++ if (group->csg_id >= 0) { ++ list_del_init(&group->run_node); ++ } else { ++ list_move(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } ++ group_put(group); ++ } ++ } ++} ++ ++static void ++tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx) ++{ ++ struct panthor_group *group, *tmp; ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_csg_slot *csg_slot; ++ int prio, new_csg_prio = MAX_CSG_PRIO, i; ++ u32 csg_mod_mask = 0, free_csg_slots = 0; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ int ret; ++ ++ csgs_upd_ctx_init(&upd_ctx); ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ /* Suspend or terminate evicted groups. */ ++ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { ++ bool term = !group_can_run(group); ++ int csg_id = group->csg_id; ++ ++ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) ++ continue; ++ ++ csg_slot = &sched->csg_slots[csg_id]; ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND, ++ CSG_STATE_MASK); ++ } ++ ++ /* Update priorities on already running groups. */ ++ list_for_each_entry(group, &ctx->groups[prio], run_node) { ++ struct panthor_fw_csg_iface *csg_iface; ++ int csg_id = group->csg_id; ++ ++ if (csg_id < 0) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ csg_slot = &sched->csg_slots[csg_id]; ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ if (csg_slot->priority == new_csg_prio) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ panthor_fw_update_reqs(csg_iface, endpoint_req, ++ CSG_EP_REQ_PRIORITY(new_csg_prio), ++ CSG_EP_REQ_PRIORITY_MASK); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, ++ CSG_ENDPOINT_CONFIG); ++ new_csg_prio--; ++ } ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ return; ++ } ++ ++ /* Unbind evicted groups. */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { ++ /* This group is gone. Process interrupts to clear ++ * any pending interrupts before we start the new ++ * group. ++ */ ++ if (group->csg_id >= 0) ++ sched_process_csg_irq_locked(ptdev, group->csg_id); ++ ++ group_unbind_locked(group); ++ } ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ if (!sched->csg_slots[i].group) ++ free_csg_slots |= BIT(i); ++ } ++ ++ csgs_upd_ctx_init(&upd_ctx); ++ new_csg_prio = MAX_CSG_PRIO; ++ ++ /* Start new groups. */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry(group, &ctx->groups[prio], run_node) { ++ int csg_id = group->csg_id; ++ struct panthor_fw_csg_iface *csg_iface; ++ ++ if (csg_id >= 0) { ++ new_csg_prio--; ++ continue; ++ } ++ ++ csg_id = ffs(free_csg_slots) - 1; ++ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) ++ break; ++ ++ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); ++ csg_slot = &sched->csg_slots[csg_id]; ++ csg_mod_mask |= BIT(csg_id); ++ group_bind_locked(group, csg_id); ++ csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ group->state == PANTHOR_CS_GROUP_SUSPENDED ? ++ CSG_STATE_RESUME : CSG_STATE_START, ++ CSG_STATE_MASK); ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, ++ CSG_ENDPOINT_CONFIG); ++ free_csg_slots &= ~BIT(csg_id); ++ } ++ } ++ ++ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ if (ret) { ++ panthor_device_schedule_reset(ptdev); ++ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; ++ return; ++ } ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) { ++ list_del_init(&group->run_node); ++ ++ /* If the group has been destroyed while we were ++ * scheduling, ask for an immediate tick to ++ * re-evaluate as soon as possible and get rid of ++ * this dangling group. ++ */ ++ if (group->destroyed) ++ ctx->immediate_tick = true; ++ group_put(group); ++ } ++ ++ /* Return evicted groups to the idle or run queues. Groups ++ * that can no longer be run (because they've been destroyed ++ * or experienced an unrecoverable error) will be scheduled ++ * for destruction in tick_ctx_cleanup(). ++ */ ++ list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) { ++ if (!group_can_run(group)) ++ continue; ++ ++ if (group_is_idle(group)) ++ list_move_tail(&group->run_node, &sched->groups.idle[prio]); ++ else ++ list_move_tail(&group->run_node, &sched->groups.runnable[prio]); ++ group_put(group); ++ } ++ } ++ ++ sched->used_csg_slot_count = ctx->group_count; ++ sched->might_have_idle_groups = ctx->idle_group_count > 0; ++} ++ ++static u64 ++tick_ctx_update_resched_target(struct panthor_scheduler *sched, ++ const struct panthor_sched_tick_ctx *ctx) ++{ ++ /* We had space left, no need to reschedule until some external event happens. */ ++ if (!tick_ctx_is_full(sched, ctx)) ++ goto no_tick; ++ ++ /* If idle groups were scheduled, no need to wake up until some external ++ * event happens (group unblocked, new job submitted, ...). ++ */ ++ if (ctx->idle_group_count) ++ goto no_tick; ++ ++ if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT)) ++ goto no_tick; ++ ++ /* If there are groups of the same priority waiting, we need to ++ * keep the scheduler ticking, otherwise, we'll just wait for ++ * new groups with higher priority to be queued. ++ */ ++ if (!list_empty(&sched->groups.runnable[ctx->min_priority])) { ++ u64 resched_target = sched->last_tick + sched->tick_period; ++ ++ if (time_before64(sched->resched_target, sched->last_tick) || ++ time_before64(resched_target, sched->resched_target)) ++ sched->resched_target = resched_target; ++ ++ return sched->resched_target - sched->last_tick; ++ } ++ ++no_tick: ++ sched->resched_target = U64_MAX; ++ return U64_MAX; ++} ++ ++static void tick_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, ++ tick_work.work); ++ struct panthor_device *ptdev = sched->ptdev; ++ struct panthor_sched_tick_ctx ctx; ++ u64 remaining_jiffies = 0, resched_delay; ++ u64 now = get_jiffies_64(); ++ int prio, ret, cookie; ++ ++ if (!drm_dev_enter(&ptdev->base, &cookie)) ++ return; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ goto out_dev_exit; ++ ++ if (time_before64(now, sched->resched_target)) ++ remaining_jiffies = sched->resched_target - now; ++ ++ mutex_lock(&sched->lock); ++ if (panthor_device_reset_is_pending(sched->ptdev)) ++ goto out_unlock; ++ ++ tick_ctx_init(sched, &ctx, remaining_jiffies != 0); ++ if (ctx.csg_upd_failed_mask) ++ goto out_cleanup_ctx; ++ ++ if (remaining_jiffies) { ++ /* Scheduling forced in the middle of a tick. Only RT groups ++ * can preempt non-RT ones. Currently running RT groups can't be ++ * preempted. ++ */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], ++ true, true); ++ if (prio == PANTHOR_CSG_PRIORITY_RT) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, ++ &sched->groups.runnable[prio], ++ true, false); ++ } ++ } ++ } ++ ++ /* First pick non-idle groups */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio], ++ true, false); ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true); ++ } ++ ++ /* If we have free CSG slots left, pick idle groups */ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; ++ prio >= 0 && !tick_ctx_is_full(sched, &ctx); ++ prio--) { ++ /* Check the old_group queue first to avoid reprogramming the slots */ ++ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true); ++ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio], ++ false, false); ++ } ++ ++ tick_ctx_apply(sched, &ctx); ++ if (ctx.csg_upd_failed_mask) ++ goto out_cleanup_ctx; ++ ++ if (ctx.idle_group_count == ctx.group_count) { ++ panthor_devfreq_record_idle(sched->ptdev); ++ if (sched->pm.has_ref) { ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ sched->pm.has_ref = false; ++ } ++ } else { ++ panthor_devfreq_record_busy(sched->ptdev); ++ if (!sched->pm.has_ref) { ++ pm_runtime_get(ptdev->base.dev); ++ sched->pm.has_ref = true; ++ } ++ } ++ ++ sched->last_tick = now; ++ resched_delay = tick_ctx_update_resched_target(sched, &ctx); ++ if (ctx.immediate_tick) ++ resched_delay = 0; ++ ++ if (resched_delay != U64_MAX) ++ sched_queue_delayed_work(sched, tick, resched_delay); ++ ++out_cleanup_ctx: ++ tick_ctx_cleanup(sched, &ctx); ++ ++out_unlock: ++ mutex_unlock(&sched->lock); ++ pm_runtime_mark_last_busy(ptdev->base.dev); ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++} ++ ++static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx) ++{ ++ struct panthor_queue *queue = group->queues[queue_idx]; ++ union { ++ struct panthor_syncobj_64b sync64; ++ struct panthor_syncobj_32b sync32; ++ } *syncobj; ++ bool result; ++ u64 value; ++ ++ syncobj = panthor_queue_get_syncwait_obj(group, queue); ++ if (!syncobj) ++ return -EINVAL; ++ ++ value = queue->syncwait.sync64 ? ++ syncobj->sync64.seqno : ++ syncobj->sync32.seqno; ++ ++ if (queue->syncwait.gt) ++ result = value > queue->syncwait.ref; ++ else ++ result = value <= queue->syncwait.ref; ++ ++ if (result) ++ panthor_queue_put_syncwait_obj(queue); ++ ++ return result; ++} ++ ++static void sync_upd_work(struct work_struct *work) ++{ ++ struct panthor_scheduler *sched = container_of(work, ++ struct panthor_scheduler, ++ sync_upd_work); ++ struct panthor_group *group, *tmp; ++ bool immediate_tick = false; ++ ++ mutex_lock(&sched->lock); ++ list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) { ++ u32 tested_queues = group->blocked_queues; ++ u32 unblocked_queues = 0; ++ ++ while (tested_queues) { ++ u32 cs_id = ffs(tested_queues) - 1; ++ int ret; ++ ++ ret = panthor_queue_eval_syncwait(group, cs_id); ++ drm_WARN_ON(&group->ptdev->base, ret < 0); ++ if (ret) ++ unblocked_queues |= BIT(cs_id); ++ ++ tested_queues &= ~BIT(cs_id); ++ } ++ ++ if (unblocked_queues) { ++ group->blocked_queues &= ~unblocked_queues; ++ ++ if (group->csg_id < 0) { ++ list_move(&group->run_node, ++ &sched->groups.runnable[group->priority]); ++ if (group->priority == PANTHOR_CSG_PRIORITY_RT) ++ immediate_tick = true; ++ } ++ } ++ ++ if (!group->blocked_queues) ++ list_del_init(&group->wait_node); ++ } ++ mutex_unlock(&sched->lock); ++ ++ if (immediate_tick) ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) ++{ ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct list_head *queue = &sched->groups.runnable[group->priority]; ++ u64 delay_jiffies = 0; ++ bool was_idle; ++ u64 now; ++ ++ if (!group_can_run(group)) ++ return; ++ ++ /* All updated queues are blocked, no need to wake up the scheduler. */ ++ if ((queue_mask & group->blocked_queues) == queue_mask) ++ return; ++ ++ was_idle = group_is_idle(group); ++ group->idle_queues &= ~queue_mask; ++ ++ /* Don't mess up with the lists if we're in a middle of a reset. */ ++ if (atomic_read(&sched->reset.in_progress)) ++ return; ++ ++ if (was_idle && !group_is_idle(group)) ++ list_move_tail(&group->run_node, queue); ++ ++ /* RT groups are preemptive. */ ++ if (group->priority == PANTHOR_CSG_PRIORITY_RT) { ++ sched_queue_delayed_work(sched, tick, 0); ++ return; ++ } ++ ++ /* Some groups might be idle, force an immediate tick to ++ * re-evaluate. ++ */ ++ if (sched->might_have_idle_groups) { ++ sched_queue_delayed_work(sched, tick, 0); ++ return; ++ } ++ ++ /* Scheduler is ticking, nothing to do. */ ++ if (sched->resched_target != U64_MAX) { ++ /* If there are free slots, force immediating ticking. */ ++ if (sched->used_csg_slot_count < sched->csg_slot_count) ++ sched_queue_delayed_work(sched, tick, 0); ++ ++ return; ++ } ++ ++ /* Scheduler tick was off, recalculate the resched_target based on the ++ * last tick event, and queue the scheduler work. ++ */ ++ now = get_jiffies_64(); ++ sched->resched_target = sched->last_tick + sched->tick_period; ++ if (sched->used_csg_slot_count == sched->csg_slot_count && ++ time_before64(now, sched->resched_target)) ++ delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX); ++ ++ sched_queue_delayed_work(sched, tick, delay_jiffies); ++} ++ ++static void queue_stop(struct panthor_queue *queue, ++ struct panthor_job *bad_job) ++{ ++ drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); ++} ++ ++static void queue_start(struct panthor_queue *queue) ++{ ++ struct panthor_job *job; ++ ++ /* Re-assign the parent fences. */ ++ list_for_each_entry(job, &queue->scheduler.pending_list, base.list) ++ job->base.s_fence->parent = dma_fence_get(job->done_fence); ++ ++ drm_sched_start(&queue->scheduler, true); ++} ++ ++static void panthor_group_stop(struct panthor_group *group) ++{ ++ struct panthor_scheduler *sched = group->ptdev->scheduler; ++ ++ lockdep_assert_held(&sched->reset.lock); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ queue_stop(group->queues[i], NULL); ++ ++ group_get(group); ++ list_move_tail(&group->run_node, &sched->reset.stopped_groups); ++} ++ ++static void panthor_group_start(struct panthor_group *group) ++{ ++ struct panthor_scheduler *sched = group->ptdev->scheduler; ++ ++ lockdep_assert_held(&group->ptdev->scheduler->reset.lock); ++ ++ for (u32 i = 0; i < group->queue_count; i++) ++ queue_start(group->queues[i]); ++ ++ if (group_can_run(group)) { ++ list_move_tail(&group->run_node, ++ group_is_idle(group) ? ++ &sched->groups.idle[group->priority] : ++ &sched->groups.runnable[group->priority]); ++ } else { ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ ++ group_put(group); ++} ++ ++static void panthor_sched_immediate_tick(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ sched_queue_delayed_work(sched, tick, 0); ++} ++ ++/** ++ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler. ++ */ ++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev) ++{ ++ /* Force a tick to immediately kill faulty groups. */ ++ if (ptdev->scheduler) ++ panthor_sched_immediate_tick(ptdev); ++} ++ ++void panthor_sched_resume(struct panthor_device *ptdev) ++{ ++ /* Force a tick to re-evaluate after a resume. */ ++ panthor_sched_immediate_tick(ptdev); ++} ++ ++void panthor_sched_suspend(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_csg_slots_upd_ctx upd_ctx; ++ u64 suspended_slots, faulty_slots; ++ struct panthor_group *group; ++ u32 i; ++ ++ mutex_lock(&sched->lock); ++ csgs_upd_ctx_init(&upd_ctx); ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ ++ if (csg_slot->group) { ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, ++ CSG_STATE_SUSPEND, ++ CSG_STATE_MASK); ++ } ++ } ++ ++ suspended_slots = upd_ctx.update_mask; ++ ++ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ suspended_slots &= ~upd_ctx.timedout_mask; ++ faulty_slots = upd_ctx.timedout_mask; ++ ++ if (faulty_slots) { ++ u32 slot_mask = faulty_slots; ++ ++ drm_err(&ptdev->base, "CSG suspend failed, escalating to termination"); ++ csgs_upd_ctx_init(&upd_ctx); ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ ++ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, ++ CSG_STATE_TERMINATE, ++ CSG_STATE_MASK); ++ slot_mask &= ~BIT(csg_id); ++ } ++ ++ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); ++ ++ slot_mask = upd_ctx.timedout_mask; ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ ++ /* Terminate command timedout, but the soft-reset will ++ * automatically terminate all active groups, so let's ++ * force the state to halted here. ++ */ ++ if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) ++ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; ++ slot_mask &= ~BIT(csg_id); ++ } ++ } ++ ++ /* Flush L2 and LSC caches to make sure suspend state is up-to-date. ++ * If the flush fails, flag all queues for termination. ++ */ ++ if (suspended_slots) { ++ bool flush_caches_failed = false; ++ u32 slot_mask = suspended_slots; ++ ++ if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0)) ++ flush_caches_failed = true; ++ ++ while (slot_mask) { ++ u32 csg_id = ffs(slot_mask) - 1; ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; ++ ++ if (flush_caches_failed) ++ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; ++ else ++ csg_slot_sync_update_locked(ptdev, csg_id); ++ ++ slot_mask &= ~BIT(csg_id); ++ } ++ ++ if (flush_caches_failed) ++ faulty_slots |= suspended_slots; ++ } ++ ++ for (i = 0; i < sched->csg_slot_count; i++) { ++ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; ++ ++ group = csg_slot->group; ++ if (!group) ++ continue; ++ ++ group_get(group); ++ ++ if (group->csg_id >= 0) ++ sched_process_csg_irq_locked(ptdev, group->csg_id); ++ ++ group_unbind_locked(group); ++ ++ drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node)); ++ ++ if (group_can_run(group)) { ++ list_add(&group->run_node, ++ &sched->groups.idle[group->priority]); ++ } else { ++ /* We don't bother stopping the scheduler if the group is ++ * faulty, the group termination work will finish the job. ++ */ ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ group_put(group); ++ } ++ mutex_unlock(&sched->lock); ++} ++ ++void panthor_sched_pre_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group, *group_tmp; ++ u32 i; ++ ++ mutex_lock(&sched->reset.lock); ++ atomic_set(&sched->reset.in_progress, true); ++ ++ /* Cancel all scheduler works. Once this is done, these works can't be ++ * scheduled again until the reset operation is complete. ++ */ ++ cancel_work_sync(&sched->sync_upd_work); ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ panthor_sched_suspend(ptdev); ++ ++ /* Stop all groups that might still accept jobs, so we don't get passed ++ * new jobs while we're resetting. ++ */ ++ for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) { ++ /* All groups should be in the idle lists. */ ++ drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i])); ++ list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node) ++ panthor_group_stop(group); ++ } ++ ++ for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) { ++ list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node) ++ panthor_group_stop(group); ++ } ++ ++ mutex_unlock(&sched->reset.lock); ++} ++ ++void panthor_sched_post_reset(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group, *group_tmp; ++ ++ mutex_lock(&sched->reset.lock); ++ ++ list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) ++ panthor_group_start(group); ++ ++ /* We're done resetting the GPU, clear the reset.in_progress bit so we can ++ * kick the scheduler. ++ */ ++ atomic_set(&sched->reset.in_progress, false); ++ mutex_unlock(&sched->reset.lock); ++ ++ sched_queue_delayed_work(sched, tick, 0); ++ ++ sched_queue_work(sched, sync_upd); ++} ++ ++static void group_sync_upd_work(struct work_struct *work) ++{ ++ struct panthor_group *group = ++ container_of(work, struct panthor_group, sync_upd_work); ++ struct panthor_job *job, *job_tmp; ++ LIST_HEAD(done_jobs); ++ u32 queue_idx; ++ bool cookie; ++ ++ cookie = dma_fence_begin_signalling(); ++ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { ++ struct panthor_queue *queue = group->queues[queue_idx]; ++ struct panthor_syncobj_64b *syncobj; ++ ++ if (!queue) ++ continue; ++ ++ syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj)); ++ ++ spin_lock(&queue->fence_ctx.lock); ++ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { ++ if (!job->call_info.size) ++ continue; ++ ++ if (syncobj->seqno < job->done_fence->seqno) ++ break; ++ ++ list_move_tail(&job->node, &done_jobs); ++ dma_fence_signal_locked(job->done_fence); ++ } ++ spin_unlock(&queue->fence_ctx.lock); ++ } ++ dma_fence_end_signalling(cookie); ++ ++ list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { ++ list_del_init(&job->node); ++ panthor_job_put(&job->base); ++ } ++ ++ group_put(group); ++} ++ ++static struct dma_fence * ++queue_run_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ struct panthor_group *group = job->group; ++ struct panthor_queue *queue = group->queues[job->queue_idx]; ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); ++ u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1); ++ u64 addr_reg = ptdev->csif_info.cs_reg_count - ++ ptdev->csif_info.unpreserved_cs_reg_count; ++ u64 val_reg = addr_reg + 2; ++ u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) + ++ job->queue_idx * sizeof(struct panthor_syncobj_64b); ++ u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0); ++ struct dma_fence *done_fence; ++ int ret; ++ ++ u64 call_instrs[NUM_INSTRS_PER_SLOT] = { ++ /* MOV32 rX+2, cs.latest_flush */ ++ (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush, ++ ++ /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */ ++ (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233, ++ ++ /* MOV48 rX:rX+1, cs.start */ ++ (1ull << 56) | (addr_reg << 48) | job->call_info.start, ++ ++ /* MOV32 rX+2, cs.size */ ++ (2ull << 56) | (val_reg << 48) | job->call_info.size, ++ ++ /* WAIT(0) => waits for FLUSH_CACHE2 instruction */ ++ (3ull << 56) | (1 << 16), ++ ++ /* CALL rX:rX+1, rX+2 */ ++ (32ull << 56) | (addr_reg << 40) | (val_reg << 32), ++ ++ /* MOV48 rX:rX+1, sync_addr */ ++ (1ull << 56) | (addr_reg << 48) | sync_addr, ++ ++ /* MOV48 rX+2, #1 */ ++ (1ull << 56) | (val_reg << 48) | 1, ++ ++ /* WAIT(all) */ ++ (3ull << 56) | (waitall_mask << 16), ++ ++ /* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/ ++ (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1, ++ ++ /* ERROR_BARRIER, so we can recover from faults at job ++ * boundaries. ++ */ ++ (47ull << 56), ++ }; ++ ++ /* Need to be cacheline aligned to please the prefetcher. */ ++ static_assert(sizeof(call_instrs) % 64 == 0, ++ "call_instrs is not aligned on a cacheline"); ++ ++ /* Stream size is zero, nothing to do => return a NULL fence and let ++ * drm_sched signal the parent. ++ */ ++ if (!job->call_info.size) ++ return NULL; ++ ++ ret = pm_runtime_resume_and_get(ptdev->base.dev); ++ if (drm_WARN_ON(&ptdev->base, ret)) ++ return ERR_PTR(ret); ++ ++ mutex_lock(&sched->lock); ++ if (!group_can_run(group)) { ++ done_fence = ERR_PTR(-ECANCELED); ++ goto out_unlock; ++ } ++ ++ dma_fence_init(job->done_fence, ++ &panthor_queue_fence_ops, ++ &queue->fence_ctx.lock, ++ queue->fence_ctx.id, ++ atomic64_inc_return(&queue->fence_ctx.seqno)); ++ ++ memcpy(queue->ringbuf->kmap + ringbuf_insert, ++ call_instrs, sizeof(call_instrs)); ++ ++ panthor_job_get(&job->base); ++ spin_lock(&queue->fence_ctx.lock); ++ list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs); ++ spin_unlock(&queue->fence_ctx.lock); ++ ++ job->ringbuf.start = queue->iface.input->insert; ++ job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs); ++ ++ /* Make sure the ring buffer is updated before the INSERT ++ * register. ++ */ ++ wmb(); ++ ++ queue->iface.input->extract = queue->iface.output->extract; ++ queue->iface.input->insert = job->ringbuf.end; ++ ++ if (group->csg_id < 0) { ++ /* If the queue is blocked, we want to keep the timeout running, so we ++ * can detect unbounded waits and kill the group when that happens. ++ * Otherwise, we suspend the timeout so the time we spend waiting for ++ * a CSG slot is not counted. ++ */ ++ if (!(group->blocked_queues & BIT(job->queue_idx)) && ++ !queue->timeout_suspended) { ++ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); ++ queue->timeout_suspended = true; ++ } ++ ++ group_schedule_locked(group, BIT(job->queue_idx)); ++ } else { ++ gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); ++ if (!sched->pm.has_ref && ++ !(group->blocked_queues & BIT(job->queue_idx))) { ++ pm_runtime_get(ptdev->base.dev); ++ sched->pm.has_ref = true; ++ } ++ } ++ ++ done_fence = dma_fence_get(job->done_fence); ++ ++out_unlock: ++ mutex_unlock(&sched->lock); ++ pm_runtime_mark_last_busy(ptdev->base.dev); ++ pm_runtime_put_autosuspend(ptdev->base.dev); ++ ++ return done_fence; ++} ++ ++static enum drm_gpu_sched_stat ++queue_timedout_job(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ struct panthor_group *group = job->group; ++ struct panthor_device *ptdev = group->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_queue *queue = group->queues[job->queue_idx]; ++ ++ drm_warn(&ptdev->base, "job timeout\n"); ++ ++ drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress)); ++ ++ queue_stop(queue, job); ++ ++ mutex_lock(&sched->lock); ++ group->timedout = true; ++ if (group->csg_id >= 0) { ++ sched_queue_delayed_work(ptdev->scheduler, tick, 0); ++ } else { ++ /* Remove from the run queues, so the scheduler can't ++ * pick the group on the next tick. ++ */ ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ ++ group_queue_work(group, term); ++ } ++ mutex_unlock(&sched->lock); ++ ++ queue_start(queue); ++ ++ return DRM_GPU_SCHED_STAT_NOMINAL; ++} ++ ++static void queue_free_job(struct drm_sched_job *sched_job) ++{ ++ drm_sched_job_cleanup(sched_job); ++ panthor_job_put(sched_job); ++} ++ ++static const struct drm_sched_backend_ops panthor_queue_sched_ops = { ++ .run_job = queue_run_job, ++ .timedout_job = queue_timedout_job, ++ .free_job = queue_free_job, ++}; ++ ++static struct panthor_queue * ++group_create_queue(struct panthor_group *group, ++ const struct drm_panthor_queue_create *args) ++{ ++ struct drm_gpu_scheduler *drm_sched; ++ struct panthor_queue *queue; ++ int ret; ++ ++ if (args->pad[0] || args->pad[1] || args->pad[2]) ++ return ERR_PTR(-EINVAL); ++ ++ if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K || ++ !is_power_of_2(args->ringbuf_size)) ++ return ERR_PTR(-EINVAL); ++ ++ if (args->priority > CSF_MAX_QUEUE_PRIO) ++ return ERR_PTR(-EINVAL); ++ ++ queue = kzalloc(sizeof(*queue), GFP_KERNEL); ++ if (!queue) ++ return ERR_PTR(-ENOMEM); ++ ++ queue->fence_ctx.id = dma_fence_context_alloc(1); ++ spin_lock_init(&queue->fence_ctx.lock); ++ INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); ++ ++ queue->priority = args->priority; ++ ++ queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm, ++ args->ringbuf_size, ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(queue->ringbuf)) { ++ ret = PTR_ERR(queue->ringbuf); ++ goto err_free_queue; ++ } ++ ++ ret = panthor_kernel_bo_vmap(queue->ringbuf); ++ if (ret) ++ goto err_free_queue; ++ ++ queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev, ++ &queue->iface.input, ++ &queue->iface.output, ++ &queue->iface.input_fw_va, ++ &queue->iface.output_fw_va); ++ if (IS_ERR(queue->iface.mem)) { ++ ret = PTR_ERR(queue->iface.mem); ++ goto err_free_queue; ++ } ++ ++ ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops, ++ group->ptdev->scheduler->wq, 1, ++ args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)), ++ 0, msecs_to_jiffies(JOB_TIMEOUT_MS), ++ group->ptdev->reset.wq, ++ NULL, "panthor-queue", group->ptdev->base.dev); ++ if (ret) ++ goto err_free_queue; ++ ++ drm_sched = &queue->scheduler; ++ ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL); ++ ++ return queue; ++ ++err_free_queue: ++ group_free_queue(group, queue); ++ return ERR_PTR(ret); ++} ++ ++#define MAX_GROUPS_PER_POOL 128 ++ ++int panthor_group_create(struct panthor_file *pfile, ++ const struct drm_panthor_group_create *group_args, ++ const struct drm_panthor_queue_create *queue_args) ++{ ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); ++ struct panthor_group *group = NULL; ++ u32 gid, i, suspend_size; ++ int ret; ++ ++ if (group_args->pad) ++ return -EINVAL; ++ ++ if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH) ++ return -EINVAL; ++ ++ if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) || ++ (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) || ++ (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present)) ++ return -EINVAL; ++ ++ if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores || ++ hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores || ++ hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores) ++ return -EINVAL; ++ ++ group = kzalloc(sizeof(*group), GFP_KERNEL); ++ if (!group) ++ return -ENOMEM; ++ ++ spin_lock_init(&group->fatal_lock); ++ kref_init(&group->refcount); ++ group->state = PANTHOR_CS_GROUP_CREATED; ++ group->csg_id = -1; ++ ++ group->ptdev = ptdev; ++ group->max_compute_cores = group_args->max_compute_cores; ++ group->compute_core_mask = group_args->compute_core_mask; ++ group->max_fragment_cores = group_args->max_fragment_cores; ++ group->fragment_core_mask = group_args->fragment_core_mask; ++ group->max_tiler_cores = group_args->max_tiler_cores; ++ group->tiler_core_mask = group_args->tiler_core_mask; ++ group->priority = group_args->priority; ++ ++ INIT_LIST_HEAD(&group->wait_node); ++ INIT_LIST_HEAD(&group->run_node); ++ INIT_WORK(&group->term_work, group_term_work); ++ INIT_WORK(&group->sync_upd_work, group_sync_upd_work); ++ INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); ++ INIT_WORK(&group->release_work, group_release_work); ++ ++ group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); ++ if (!group->vm) { ++ ret = -EINVAL; ++ goto err_put_group; ++ } ++ ++ suspend_size = csg_iface->control->suspend_size; ++ group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); ++ if (IS_ERR(group->suspend_buf)) { ++ ret = PTR_ERR(group->suspend_buf); ++ group->suspend_buf = NULL; ++ goto err_put_group; ++ } ++ ++ suspend_size = csg_iface->control->protm_suspend_size; ++ group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); ++ if (IS_ERR(group->protm_suspend_buf)) { ++ ret = PTR_ERR(group->protm_suspend_buf); ++ group->protm_suspend_buf = NULL; ++ goto err_put_group; ++ } ++ ++ group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm, ++ group_args->queues.count * ++ sizeof(struct panthor_syncobj_64b), ++ DRM_PANTHOR_BO_NO_MMAP, ++ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | ++ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, ++ PANTHOR_VM_KERNEL_AUTO_VA); ++ if (IS_ERR(group->syncobjs)) { ++ ret = PTR_ERR(group->syncobjs); ++ goto err_put_group; ++ } ++ ++ ret = panthor_kernel_bo_vmap(group->syncobjs); ++ if (ret) ++ goto err_put_group; ++ ++ memset(group->syncobjs->kmap, 0, ++ group_args->queues.count * sizeof(struct panthor_syncobj_64b)); ++ ++ for (i = 0; i < group_args->queues.count; i++) { ++ group->queues[i] = group_create_queue(group, &queue_args[i]); ++ if (IS_ERR(group->queues[i])) { ++ ret = PTR_ERR(group->queues[i]); ++ group->queues[i] = NULL; ++ goto err_put_group; ++ } ++ ++ group->queue_count++; ++ } ++ ++ group->idle_queues = GENMASK(group->queue_count - 1, 0); ++ ++ ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); ++ if (ret) ++ goto err_put_group; ++ ++ mutex_lock(&sched->reset.lock); ++ if (atomic_read(&sched->reset.in_progress)) { ++ panthor_group_stop(group); ++ } else { ++ mutex_lock(&sched->lock); ++ list_add_tail(&group->run_node, ++ &sched->groups.idle[group->priority]); ++ mutex_unlock(&sched->lock); ++ } ++ mutex_unlock(&sched->reset.lock); ++ ++ return gid; ++ ++err_put_group: ++ group_put(group); ++ return ret; ++} ++ ++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group; ++ ++ group = xa_erase(&gpool->xa, group_handle); ++ if (!group) ++ return -EINVAL; ++ ++ for (u32 i = 0; i < group->queue_count; i++) { ++ if (group->queues[i]) ++ drm_sched_entity_destroy(&group->queues[i]->entity); ++ } ++ ++ mutex_lock(&sched->reset.lock); ++ mutex_lock(&sched->lock); ++ group->destroyed = true; ++ if (group->csg_id >= 0) { ++ sched_queue_delayed_work(sched, tick, 0); ++ } else if (!atomic_read(&sched->reset.in_progress)) { ++ /* Remove from the run queues, so the scheduler can't ++ * pick the group on the next tick. ++ */ ++ list_del_init(&group->run_node); ++ list_del_init(&group->wait_node); ++ group_queue_work(group, term); ++ } ++ mutex_unlock(&sched->lock); ++ mutex_unlock(&sched->reset.lock); ++ ++ group_put(group); ++ return 0; ++} ++ ++int panthor_group_get_state(struct panthor_file *pfile, ++ struct drm_panthor_group_get_state *get_state) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_device *ptdev = pfile->ptdev; ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ struct panthor_group *group; ++ ++ if (get_state->pad) ++ return -EINVAL; ++ ++ group = group_get(xa_load(&gpool->xa, get_state->group_handle)); ++ if (!group) ++ return -EINVAL; ++ ++ memset(get_state, 0, sizeof(*get_state)); ++ ++ mutex_lock(&sched->lock); ++ if (group->timedout) ++ get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT; ++ if (group->fatal_queues) { ++ get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT; ++ get_state->fatal_queues = group->fatal_queues; ++ } ++ mutex_unlock(&sched->lock); ++ ++ group_put(group); ++ return 0; ++} ++ ++int panthor_group_pool_create(struct panthor_file *pfile) ++{ ++ struct panthor_group_pool *gpool; ++ ++ gpool = kzalloc(sizeof(*gpool), GFP_KERNEL); ++ if (!gpool) ++ return -ENOMEM; ++ ++ xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1); ++ pfile->groups = gpool; ++ return 0; ++} ++ ++void panthor_group_pool_destroy(struct panthor_file *pfile) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_group *group; ++ unsigned long i; ++ ++ if (IS_ERR_OR_NULL(gpool)) ++ return; ++ ++ xa_for_each(&gpool->xa, i, group) ++ panthor_group_destroy(pfile, i); ++ ++ xa_destroy(&gpool->xa); ++ kfree(gpool); ++ pfile->groups = NULL; ++} ++ ++static void job_release(struct kref *ref) ++{ ++ struct panthor_job *job = container_of(ref, struct panthor_job, refcount); ++ ++ drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node)); ++ ++ if (job->base.s_fence) ++ drm_sched_job_cleanup(&job->base); ++ ++ if (job->done_fence && job->done_fence->ops) ++ dma_fence_put(job->done_fence); ++ else ++ dma_fence_free(job->done_fence); ++ ++ group_put(job->group); ++ ++ kfree(job); ++} ++ ++struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job) ++{ ++ if (sched_job) { ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ kref_get(&job->refcount); ++ } ++ ++ return sched_job; ++} ++ ++void panthor_job_put(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ if (sched_job) ++ kref_put(&job->refcount, job_release); ++} ++ ++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ return job->group->vm; ++} ++ ++struct drm_sched_job * ++panthor_job_create(struct panthor_file *pfile, ++ u16 group_handle, ++ const struct drm_panthor_queue_submit *qsubmit) ++{ ++ struct panthor_group_pool *gpool = pfile->groups; ++ struct panthor_job *job; ++ int ret; ++ ++ if (qsubmit->pad) ++ return ERR_PTR(-EINVAL); ++ ++ /* If stream_addr is zero, so stream_size should be. */ ++ if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0)) ++ return ERR_PTR(-EINVAL); ++ ++ /* Make sure the address is aligned on 64-byte (cacheline) and the size is ++ * aligned on 8-byte (instruction size). ++ */ ++ if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7)) ++ return ERR_PTR(-EINVAL); ++ ++ /* bits 24:30 must be zero. */ ++ if (qsubmit->latest_flush & GENMASK(30, 24)) ++ return ERR_PTR(-EINVAL); ++ ++ job = kzalloc(sizeof(*job), GFP_KERNEL); ++ if (!job) ++ return ERR_PTR(-ENOMEM); ++ ++ kref_init(&job->refcount); ++ job->queue_idx = qsubmit->queue_index; ++ job->call_info.size = qsubmit->stream_size; ++ job->call_info.start = qsubmit->stream_addr; ++ job->call_info.latest_flush = qsubmit->latest_flush; ++ INIT_LIST_HEAD(&job->node); ++ ++ job->group = group_get(xa_load(&gpool->xa, group_handle)); ++ if (!job->group) { ++ ret = -EINVAL; ++ goto err_put_job; ++ } ++ ++ if (job->queue_idx >= job->group->queue_count || ++ !job->group->queues[job->queue_idx]) { ++ ret = -EINVAL; ++ goto err_put_job; ++ } ++ ++ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); ++ if (!job->done_fence) { ++ ret = -ENOMEM; ++ goto err_put_job; ++ } ++ ++ ret = drm_sched_job_init(&job->base, ++ &job->group->queues[job->queue_idx]->entity, ++ 1, job->group); ++ if (ret) ++ goto err_put_job; ++ ++ return &job->base; ++ ++err_put_job: ++ panthor_job_put(&job->base); ++ return ERR_PTR(ret); ++} ++ ++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job) ++{ ++ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); ++ ++ /* Still not sure why we want USAGE_WRITE for external objects, since I ++ * was assuming this would be handled through explicit syncs being imported ++ * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers ++ * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason. ++ */ ++ panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished, ++ DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); ++} ++ ++void panthor_sched_unplug(struct panthor_device *ptdev) ++{ ++ struct panthor_scheduler *sched = ptdev->scheduler; ++ ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ mutex_lock(&sched->lock); ++ if (sched->pm.has_ref) { ++ pm_runtime_put(ptdev->base.dev); ++ sched->pm.has_ref = false; ++ } ++ mutex_unlock(&sched->lock); ++} ++ ++static void panthor_sched_fini(struct drm_device *ddev, void *res) ++{ ++ struct panthor_scheduler *sched = res; ++ int prio; ++ ++ if (!sched || !sched->csg_slot_count) ++ return; ++ ++ cancel_delayed_work_sync(&sched->tick_work); ++ ++ if (sched->wq) ++ destroy_workqueue(sched->wq); ++ ++ if (sched->heap_alloc_wq) ++ destroy_workqueue(sched->heap_alloc_wq); ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio])); ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio])); ++ } ++ ++ drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting)); ++} ++ ++int panthor_sched_init(struct panthor_device *ptdev) ++{ ++ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); ++ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); ++ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0); ++ struct panthor_scheduler *sched; ++ u32 gpu_as_count, num_groups; ++ int prio, ret; ++ ++ sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL); ++ if (!sched) ++ return -ENOMEM; ++ ++ /* The highest bit in JOB_INT_* is reserved for globabl IRQs. That ++ * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here. ++ */ ++ num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num); ++ ++ /* The FW-side scheduler might deadlock if two groups with the same ++ * priority try to access a set of resources that overlaps, with part ++ * of the resources being allocated to one group and the other part to ++ * the other group, both groups waiting for the remaining resources to ++ * be allocated. To avoid that, it is recommended to assign each CSG a ++ * different priority. In theory we could allow several groups to have ++ * the same CSG priority if they don't request the same resources, but ++ * that makes the scheduling logic more complicated, so let's clamp ++ * the number of CSG slots to MAX_CSG_PRIO + 1 for now. ++ */ ++ num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups); ++ ++ /* We need at least one AS for the MCU and one for the GPU contexts. */ ++ gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1)); ++ if (!gpu_as_count) { ++ drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)", ++ gpu_as_count + 1); ++ return -EINVAL; ++ } ++ ++ sched->ptdev = ptdev; ++ sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features); ++ sched->csg_slot_count = num_groups; ++ sched->cs_slot_count = csg_iface->control->stream_num; ++ sched->as_slot_count = gpu_as_count; ++ ptdev->csif_info.csg_slot_count = sched->csg_slot_count; ++ ptdev->csif_info.cs_slot_count = sched->cs_slot_count; ++ ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count; ++ ++ sched->last_tick = 0; ++ sched->resched_target = U64_MAX; ++ sched->tick_period = msecs_to_jiffies(10); ++ INIT_DELAYED_WORK(&sched->tick_work, tick_work); ++ INIT_WORK(&sched->sync_upd_work, sync_upd_work); ++ INIT_WORK(&sched->fw_events_work, process_fw_events_work); ++ ++ ret = drmm_mutex_init(&ptdev->base, &sched->lock); ++ if (ret) ++ return ret; ++ ++ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { ++ INIT_LIST_HEAD(&sched->groups.runnable[prio]); ++ INIT_LIST_HEAD(&sched->groups.idle[prio]); ++ } ++ INIT_LIST_HEAD(&sched->groups.waiting); ++ ++ ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock); ++ if (ret) ++ return ret; ++ ++ INIT_LIST_HEAD(&sched->reset.stopped_groups); ++ ++ /* sched->heap_alloc_wq will be used for heap chunk allocation on ++ * tiler OOM events, which means we can't use the same workqueue for ++ * the scheduler because works queued by the scheduler are in ++ * the dma-signalling path. Allocate a dedicated heap_alloc_wq to ++ * work around this limitation. ++ * ++ * FIXME: Ultimately, what we need is a failable/non-blocking GEM ++ * allocation path that we can call when a heap OOM is reported. The ++ * FW is smart enough to fall back on other methods if the kernel can't ++ * allocate memory, and fail the tiling job if none of these ++ * countermeasures worked. ++ * ++ * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the ++ * system is running out of memory. ++ */ ++ sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0); ++ sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); ++ if (!sched->wq || !sched->heap_alloc_wq) { ++ panthor_sched_fini(&ptdev->base, sched); ++ drm_err(&ptdev->base, "Failed to allocate the workqueues"); ++ return -ENOMEM; ++ } ++ ++ ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched); ++ if (ret) ++ return ret; ++ ++ ptdev->scheduler = sched; ++ return 0; ++} +diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h +new file mode 100644 +index 000000000000..66438b1f331f +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_sched.h +@@ -0,0 +1,50 @@ ++/* SPDX-License-Identifier: GPL-2.0 or MIT */ ++/* Copyright 2023 Collabora ltd. */ ++ ++#ifndef __PANTHOR_SCHED_H__ ++#define __PANTHOR_SCHED_H__ ++ ++struct drm_exec; ++struct dma_fence; ++struct drm_file; ++struct drm_gem_object; ++struct drm_sched_job; ++struct drm_panthor_group_create; ++struct drm_panthor_queue_create; ++struct drm_panthor_group_get_state; ++struct drm_panthor_queue_submit; ++struct panthor_device; ++struct panthor_file; ++struct panthor_group_pool; ++struct panthor_job; ++ ++int panthor_group_create(struct panthor_file *pfile, ++ const struct drm_panthor_group_create *group_args, ++ const struct drm_panthor_queue_create *queue_args); ++int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); ++int panthor_group_get_state(struct panthor_file *pfile, ++ struct drm_panthor_group_get_state *get_state); ++ ++struct drm_sched_job * ++panthor_job_create(struct panthor_file *pfile, ++ u16 group_handle, ++ const struct drm_panthor_queue_submit *qsubmit); ++struct drm_sched_job *panthor_job_get(struct drm_sched_job *job); ++struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job); ++void panthor_job_put(struct drm_sched_job *job); ++void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); ++ ++int panthor_group_pool_create(struct panthor_file *pfile); ++void panthor_group_pool_destroy(struct panthor_file *pfile); ++ ++int panthor_sched_init(struct panthor_device *ptdev); ++void panthor_sched_unplug(struct panthor_device *ptdev); ++void panthor_sched_pre_reset(struct panthor_device *ptdev); ++void panthor_sched_post_reset(struct panthor_device *ptdev); ++void panthor_sched_suspend(struct panthor_device *ptdev); ++void panthor_sched_resume(struct panthor_device *ptdev); ++ ++void panthor_sched_report_mmu_fault(struct panthor_device *ptdev); ++void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); ++ ++#endif +-- +2.42.0 + + +From 7fab106779a2d2edb2465e6f8ca8d3304feb6fcb Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:25 +0100 +Subject: [PATCH 13/71] [MERGED] drm/panthor: Add the driver frontend block + +This is the last piece missing to expose the driver to the outside +world. + +This is basically a wrapper between the ioctls and the other logical +blocks. + +v6: +- Add Maxime's and Heiko's acks +- Return a page-aligned BO size to userspace +- Keep header inclusion alphabetically ordered + +v5: +- Account for the drm_exec_init() prototype change +- Include platform_device.h + +v4: +- Add an ioctl to let the UMD query the VM state +- Fix kernel doc +- Let panthor_device_init() call panthor_device_init() +- Fix cleanup ordering in the panthor_init() error path +- Add Steve's and Liviu's R-b + +v3: +- Add acks for the MIT/GPL2 relicensing +- Fix 32-bit support +- Account for panthor_vm and panthor_sched changes +- Simplify the resv preparation/update logic +- Use a linked list rather than xarray for list of signals. +- Simplify panthor_get_uobj_array by returning the newly allocated + array. +- Drop the "DOC" for job submission helpers and move the relevant + comments to panthor_ioctl_group_submit(). +- Add helpers sync_op_is_signal()/sync_op_is_wait(). +- Simplify return type of panthor_submit_ctx_add_sync_signal() and + panthor_submit_ctx_get_sync_signal(). +- Drop WARN_ON from panthor_submit_ctx_add_job(). +- Fix typos in comments. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Reviewed-by: Liviu Dudau +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-12-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/panthor/panthor_drv.c | 1473 +++++++++++++++++++++++++ + 1 file changed, 1473 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/panthor_drv.c + +diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c +new file mode 100644 +index 000000000000..ff484506229f +--- /dev/null ++++ b/drivers/gpu/drm/panthor/panthor_drv.c +@@ -0,0 +1,1473 @@ ++// SPDX-License-Identifier: GPL-2.0 or MIT ++/* Copyright 2018 Marty E. Plummer */ ++/* Copyright 2019 Linaro, Ltd., Rob Herring */ ++/* Copyright 2019 Collabora ltd. */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "panthor_device.h" ++#include "panthor_fw.h" ++#include "panthor_gem.h" ++#include "panthor_gpu.h" ++#include "panthor_heap.h" ++#include "panthor_mmu.h" ++#include "panthor_regs.h" ++#include "panthor_sched.h" ++ ++/** ++ * DOC: user <-> kernel object copy helpers. ++ */ ++ ++/** ++ * panthor_set_uobj() - Copy kernel object to user object. ++ * @usr_ptr: Users pointer. ++ * @usr_size: Size of the user object. ++ * @min_size: Minimum size for this object. ++ * @kern_size: Size of the kernel object. ++ * @in: Address of the kernel object to copy. ++ * ++ * Helper automating kernel -> user object copies. ++ * ++ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in) ++{ ++ /* User size shouldn't be smaller than the minimal object size. */ ++ if (usr_size < min_size) ++ return -EINVAL; ++ ++ if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size))) ++ return -EFAULT; ++ ++ /* When the kernel object is smaller than the user object, we fill the gap with ++ * zeros. ++ */ ++ if (usr_size > kern_size && ++ clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) { ++ return -EFAULT; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array. ++ * @in: The object array to copy. ++ * @min_stride: Minimum array stride. ++ * @obj_size: Kernel object size. ++ * ++ * Helper automating user -> kernel object copies. ++ * ++ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead. ++ * ++ * Return: newly allocated object array or an ERR_PTR on error. ++ */ ++static void * ++panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, ++ u32 obj_size) ++{ ++ int ret = 0; ++ void *out_alloc; ++ ++ /* User stride must be at least the minimum object size, otherwise it might ++ * lack useful information. ++ */ ++ if (in->stride < min_stride) ++ return ERR_PTR(-EINVAL); ++ ++ if (!in->count) ++ return NULL; ++ ++ out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); ++ if (!out_alloc) ++ return ERR_PTR(-ENOMEM); ++ ++ if (obj_size == in->stride) { ++ /* Fast path when user/kernel have the same uAPI header version. */ ++ if (copy_from_user(out_alloc, u64_to_user_ptr(in->array), ++ (unsigned long)obj_size * in->count)) ++ ret = -EFAULT; ++ } else { ++ void __user *in_ptr = u64_to_user_ptr(in->array); ++ void *out_ptr = out_alloc; ++ ++ /* If the sizes differ, we need to copy elements one by one. */ ++ for (u32 i = 0; i < in->count; i++) { ++ ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride); ++ if (ret) ++ break; ++ ++ out_ptr += obj_size; ++ in_ptr += in->stride; ++ } ++ } ++ ++ if (ret) { ++ kvfree(out_alloc); ++ return ERR_PTR(ret); ++ } ++ ++ return out_alloc; ++} ++ ++/** ++ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size ++ * @_typename: Object type. ++ * @_last_mandatory_field: Last mandatory field. ++ * ++ * Get the minimum user object size based on the last mandatory field name, ++ * A.K.A, the name of the last field of the structure at the time this ++ * structure was added to the uAPI. ++ * ++ * Don't use directly, use PANTHOR_UOBJ_DECL() instead. ++ */ ++#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \ ++ (offsetof(_typename, _last_mandatory_field) + \ ++ sizeof(((_typename *)NULL)->_last_mandatory_field)) ++ ++/** ++ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to ++ * evolutions. ++ * @_typename: Object type. ++ * @_last_mandatory_field: Last mandatory field. ++ * ++ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list. ++ */ ++#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \ ++ _typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) ++ ++/** ++ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object ++ * @_obj_name: Object to get the minimum size of. ++ * ++ * Don't use this macro directly, it's automatically called by ++ * PANTHOR_UOBJ_{SET,GET_ARRAY}(). ++ */ ++#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \ ++ _Generic(_obj_name, \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ ++ PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) ++ ++/** ++ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. ++ * @_dest_usr_ptr: User pointer to copy to. ++ * @_usr_size: Size of the user object. ++ * @_src_obj: Kernel object to copy (not a pointer). ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \ ++ panthor_set_uobj(_dest_usr_ptr, _usr_size, \ ++ PANTHOR_UOBJ_MIN_SIZE(_src_obj), \ ++ sizeof(_src_obj), &(_src_obj)) ++ ++/** ++ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible ++ * object array. ++ * @_dest_array: Local variable that will hold the newly allocated kernel ++ * object array. ++ * @_uobj_array: The drm_panthor_obj_array object describing the user object ++ * array. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \ ++ ({ \ ++ typeof(_dest_array) _tmp; \ ++ _tmp = panthor_get_uobj_array(_uobj_array, \ ++ PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \ ++ sizeof((_dest_array)[0])); \ ++ if (!IS_ERR(_tmp)) \ ++ _dest_array = _tmp; \ ++ PTR_ERR_OR_ZERO(_tmp); \ ++ }) ++ ++/** ++ * struct panthor_sync_signal - Represent a synchronization object point to attach ++ * our job fence to. ++ * ++ * This structure is here to keep track of fences that are currently bound to ++ * a specific syncobj point. ++ * ++ * At the beginning of a job submission, the fence ++ * is retrieved from the syncobj itself, and can be NULL if no fence was attached ++ * to this point. ++ * ++ * At the end, it points to the fence of the last job that had a ++ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj. ++ * ++ * With jobs being submitted in batches, the fence might change several times during ++ * the process, allowing one job to wait on a job that's part of the same submission ++ * but appears earlier in the drm_panthor_group_submit::queue_submits array. ++ */ ++struct panthor_sync_signal { ++ /** @node: list_head to track signal ops within a submit operation */ ++ struct list_head node; ++ ++ /** @handle: The syncobj handle. */ ++ u32 handle; ++ ++ /** ++ * @point: The syncobj point. ++ * ++ * Zero for regular syncobjs, and non-zero for timeline syncobjs. ++ */ ++ u64 point; ++ ++ /** ++ * @syncobj: The sync object pointed by @handle. ++ */ ++ struct drm_syncobj *syncobj; ++ ++ /** ++ * @chain: Chain object used to link the new fence to an existing ++ * timeline syncobj. ++ * ++ * NULL for regular syncobj, non-NULL for timeline syncobjs. ++ */ ++ struct dma_fence_chain *chain; ++ ++ /** ++ * @fence: The fence to assign to the syncobj or syncobj-point. ++ */ ++ struct dma_fence *fence; ++}; ++ ++/** ++ * struct panthor_job_ctx - Job context ++ */ ++struct panthor_job_ctx { ++ /** @job: The job that is about to be submitted to drm_sched. */ ++ struct drm_sched_job *job; ++ ++ /** @syncops: Array of sync operations. */ ++ struct drm_panthor_sync_op *syncops; ++ ++ /** @syncop_count: Number of sync operations. */ ++ u32 syncop_count; ++}; ++ ++/** ++ * struct panthor_submit_ctx - Submission context ++ * ++ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or ++ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the ++ * initialization and cleanup steps. ++ */ ++struct panthor_submit_ctx { ++ /** @file: DRM file this submission happens on. */ ++ struct drm_file *file; ++ ++ /** ++ * @signals: List of struct panthor_sync_signal. ++ * ++ * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here, ++ * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry ++ * matching the syncobj+point exists before calling ++ * drm_syncobj_find_fence(). This allows us to describe dependencies ++ * existing between jobs that are part of the same batch. ++ */ ++ struct list_head signals; ++ ++ /** @jobs: Array of jobs. */ ++ struct panthor_job_ctx *jobs; ++ ++ /** @job_count: Number of entries in the @jobs array. */ ++ u32 job_count; ++ ++ /** @exec: drm_exec context used to acquire and prepare resv objects. */ ++ struct drm_exec exec; ++}; ++ ++#define PANTHOR_SYNC_OP_FLAGS_MASK \ ++ (DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL) ++ ++static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op) ++{ ++ return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); ++} ++ ++static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op) ++{ ++ /* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */ ++ return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); ++} ++ ++/** ++ * panthor_check_sync_op() - Check drm_panthor_sync_op fields ++ * @sync_op: The sync operation to check. ++ * ++ * Return: 0 on success, -EINVAL otherwise. ++ */ ++static int ++panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op) ++{ ++ u8 handle_type; ++ ++ if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK) ++ return -EINVAL; ++ ++ handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK; ++ if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && ++ handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ) ++ return -EINVAL; ++ ++ if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && ++ sync_op->timeline_value != 0) ++ return -EINVAL; ++ ++ return 0; ++} ++ ++/** ++ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object ++ * @sig_sync: Signal object to free. ++ */ ++static void ++panthor_sync_signal_free(struct panthor_sync_signal *sig_sync) ++{ ++ if (!sig_sync) ++ return; ++ ++ drm_syncobj_put(sig_sync->syncobj); ++ dma_fence_chain_free(sig_sync->chain); ++ dma_fence_put(sig_sync->fence); ++ kfree(sig_sync); ++} ++ ++/** ++ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context ++ * @ctx: Context to add the signal operation to. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: 0 on success, otherwise negative error value. ++ */ ++static int ++panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ struct dma_fence *cur_fence; ++ int ret; ++ ++ sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL); ++ if (!sig_sync) ++ return -ENOMEM; ++ ++ sig_sync->handle = handle; ++ sig_sync->point = point; ++ ++ if (point > 0) { ++ sig_sync->chain = dma_fence_chain_alloc(); ++ if (!sig_sync->chain) { ++ ret = -ENOMEM; ++ goto err_free_sig_sync; ++ } ++ } ++ ++ sig_sync->syncobj = drm_syncobj_find(ctx->file, handle); ++ if (!sig_sync->syncobj) { ++ ret = -EINVAL; ++ goto err_free_sig_sync; ++ } ++ ++ /* Retrieve the current fence attached to that point. It's ++ * perfectly fine to get a NULL fence here, it just means there's ++ * no fence attached to that point yet. ++ */ ++ if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence)) ++ sig_sync->fence = cur_fence; ++ ++ list_add_tail(&sig_sync->node, &ctx->signals); ++ ++ return 0; ++ ++err_free_sig_sync: ++ panthor_sync_signal_free(sig_sync); ++ return ret; ++} ++ ++/** ++ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a ++ * submit context. ++ * @ctx: Context to search the signal operation in. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: A valid panthor_sync_signal object if found, NULL otherwise. ++ */ ++static struct panthor_sync_signal * ++panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ list_for_each_entry(sig_sync, &ctx->signals, node) { ++ if (handle == sig_sync->handle && point == sig_sync->point) ++ return sig_sync; ++ } ++ ++ return NULL; ++} ++ ++/** ++ * panthor_submit_ctx_add_job() - Add a job to a submit context ++ * @ctx: Context to search the signal operation in. ++ * @idx: Index of the job in the context. ++ * @job: Job to add. ++ * @syncs: Sync operations provided by userspace. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx, ++ struct drm_sched_job *job, ++ const struct drm_panthor_obj_array *syncs) ++{ ++ int ret; ++ ++ ctx->jobs[idx].job = job; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs); ++ if (ret) ++ return ret; ++ ++ ctx->jobs[idx].syncop_count = syncs->count; ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found. ++ * @ctx: Context to search the signal operation in. ++ * @handle: Syncobj handle. ++ * @point: Syncobj point. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point); ++ if (sig_sync) ++ return 0; ++ ++ return panthor_submit_ctx_add_sync_signal(ctx, handle, point); ++} ++ ++/** ++ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences ++ * on the signal operations specified by a job. ++ * @ctx: Context to search the signal operation in. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, ++ struct panthor_device, ++ base); ++ struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished; ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ struct dma_fence *old_fence; ++ struct panthor_sync_signal *sig_sync; ++ ++ if (!sync_op_is_signal(&sync_ops[i])) ++ continue; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (drm_WARN_ON(&ptdev->base, !sig_sync)) ++ return -EINVAL; ++ ++ old_fence = sig_sync->fence; ++ sig_sync->fence = dma_fence_get(done_fence); ++ dma_fence_put(old_fence); ++ ++ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations ++ * and add them to the context. ++ * @ctx: Context to search the signal operation in. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ int ret; ++ ++ if (!sync_op_is_signal(&sync_ops[i])) ++ continue; ++ ++ ret = panthor_check_sync_op(&sync_ops[i]); ++ if (ret) ++ return ret; ++ ++ ret = panthor_submit_ctx_get_sync_signal(ctx, ++ sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push ++ * the currently assigned fence to the associated syncobj. ++ * @ctx: Context to push fences on. ++ * ++ * This is the last step of a submission procedure, and is done once we know the submission ++ * is effective and job fences are guaranteed to be signaled in finite time. ++ */ ++static void ++panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx) ++{ ++ struct panthor_sync_signal *sig_sync; ++ ++ list_for_each_entry(sig_sync, &ctx->signals, node) { ++ if (sig_sync->chain) { ++ drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain, ++ sig_sync->fence, sig_sync->point); ++ sig_sync->chain = NULL; ++ } else { ++ drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence); ++ } ++ } ++} ++ ++/** ++ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as ++ * job dependencies. ++ * @ctx: Submit context. ++ * @job_idx: Index of the job to operate on. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx, ++ u32 job_idx) ++{ ++ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, ++ struct panthor_device, ++ base); ++ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; ++ struct drm_sched_job *job = ctx->jobs[job_idx].job; ++ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; ++ int ret = 0; ++ ++ for (u32 i = 0; i < sync_op_count; i++) { ++ struct panthor_sync_signal *sig_sync; ++ struct dma_fence *fence; ++ ++ if (!sync_op_is_wait(&sync_ops[i])) ++ continue; ++ ++ ret = panthor_check_sync_op(&sync_ops[i]); ++ if (ret) ++ return ret; ++ ++ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, ++ sync_ops[i].timeline_value); ++ if (sig_sync) { ++ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) ++ return -EINVAL; ++ ++ fence = dma_fence_get(sig_sync->fence); ++ } else { ++ ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle, ++ sync_ops[i].timeline_value, ++ 0, &fence); ++ if (ret) ++ return ret; ++ } ++ ++ ret = drm_sched_job_add_dependency(job, fence); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations ++ * and add them to the submit context. ++ * @ctx: Submit context. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ int ret; ++ ++ ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs ++ * @ctx: Submit context. ++ * ++ * Must be called after the resv preparation has been taken care of. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int ++panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ int ret; ++ ++ ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i); ++ if (ret) ++ return ret; ++ ++ drm_sched_job_arm(ctx->jobs[i].job); ++ ++ ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i); ++ if (ret) ++ return ret; ++ } ++ ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities. ++ * @ctx: Submit context. ++ * @upd_resvs: Callback used to update reservation objects that were previously ++ * preapred. ++ */ ++static void ++panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx, ++ void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *)) ++{ ++ for (u32 i = 0; i < ctx->job_count; i++) { ++ upd_resvs(&ctx->exec, ctx->jobs[i].job); ++ drm_sched_entity_push_job(ctx->jobs[i].job); ++ ++ /* Job is owned by the scheduler now. */ ++ ctx->jobs[i].job = NULL; ++ } ++ ++ panthor_submit_ctx_push_fences(ctx); ++} ++ ++/** ++ * panthor_submit_ctx_init() - Initializes a submission context ++ * @ctx: Submit context to initialize. ++ * @file: drm_file this submission happens on. ++ * @job_count: Number of jobs that will be submitted. ++ * ++ * Return: 0 on success, a negative error code otherwise. ++ */ ++static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx, ++ struct drm_file *file, u32 job_count) ++{ ++ ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs), ++ GFP_KERNEL | __GFP_ZERO); ++ if (!ctx->jobs) ++ return -ENOMEM; ++ ++ ctx->file = file; ++ ctx->job_count = job_count; ++ INIT_LIST_HEAD(&ctx->signals); ++ drm_exec_init(&ctx->exec, ++ DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, ++ 0); ++ return 0; ++} ++ ++/** ++ * panthor_submit_ctx_cleanup() - Cleanup a submission context ++ * @ctx: Submit context to cleanup. ++ * @job_put: Job put callback. ++ */ ++static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx, ++ void (*job_put)(struct drm_sched_job *)) ++{ ++ struct panthor_sync_signal *sig_sync, *tmp; ++ unsigned long i; ++ ++ drm_exec_fini(&ctx->exec); ++ ++ list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node) ++ panthor_sync_signal_free(sig_sync); ++ ++ for (i = 0; i < ctx->job_count; i++) { ++ job_put(ctx->jobs[i].job); ++ kvfree(ctx->jobs[i].syncops); ++ } ++ ++ kvfree(ctx->jobs); ++} ++ ++static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct drm_panthor_dev_query *args = data; ++ ++ if (!args->pointer) { ++ switch (args->type) { ++ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: ++ args->size = sizeof(ptdev->gpu_info); ++ return 0; ++ ++ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: ++ args->size = sizeof(ptdev->csif_info); ++ return 0; ++ ++ default: ++ return -EINVAL; ++ } ++ } ++ ++ switch (args->type) { ++ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: ++ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info); ++ ++ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: ++ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info); ++ ++ default: ++ return -EINVAL; ++ } ++} ++ ++#define PANTHOR_VM_CREATE_FLAGS 0 ++ ++static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_create *args = data; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ ret = panthor_vm_pool_create_vm(ptdev, pfile->vms, args); ++ if (ret >= 0) { ++ args->id = ret; ++ ret = 0; ++ } ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_destroy *args = data; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ return panthor_vm_pool_destroy_vm(pfile->vms, args->id); ++} ++ ++#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP ++ ++static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_bo_create *args = data; ++ struct panthor_vm *vm = NULL; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ if (!args->size || args->pad || ++ (args->flags & ~PANTHOR_BO_FLAGS)) { ++ ret = -EINVAL; ++ goto out_dev_exit; ++ } ++ ++ if (args->exclusive_vm_id) { ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); ++ if (!vm) { ++ ret = -EINVAL; ++ goto out_dev_exit; ++ } ++ } ++ ++ ret = panthor_gem_create_with_handle(file, ddev, vm, &args->size, ++ args->flags, &args->handle); ++ ++ panthor_vm_put(vm); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct drm_panthor_bo_mmap_offset *args = data; ++ struct drm_gem_object *obj; ++ int ret; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ obj = drm_gem_object_lookup(file, args->handle); ++ if (!obj) ++ return -ENOENT; ++ ++ ret = drm_gem_create_mmap_offset(obj); ++ if (ret) ++ goto out; ++ ++ args->offset = drm_vma_node_offset_addr(&obj->vma_node); ++ ++out: ++ drm_gem_object_put(obj); ++ return ret; ++} ++ ++static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_submit *args = data; ++ struct drm_panthor_queue_submit *jobs_args; ++ struct panthor_submit_ctx ctx; ++ int ret = 0, cookie; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits); ++ if (ret) ++ goto out_dev_exit; ++ ++ ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count); ++ if (ret) ++ goto out_free_jobs_args; ++ ++ /* Create jobs and attach sync operations */ ++ for (u32 i = 0; i < args->queue_submits.count; i++) { ++ const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i]; ++ struct drm_sched_job *job; ++ ++ job = panthor_job_create(pfile, args->group_handle, qsubmit); ++ if (IS_ERR(job)) { ++ ret = PTR_ERR(job); ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ /* ++ * Collect signal operations on all jobs, such that each job can pick ++ * from it for its dependencies and update the fence to signal when the ++ * job is submitted. ++ */ ++ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* ++ * We acquire/prepare revs on all jobs before proceeding with the ++ * dependency registration. ++ * ++ * This is solving two problems: ++ * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be ++ * protected by a lock to make sure no concurrent access to the same ++ * entity get interleaved, which would mess up with the fence seqno ++ * ordering. Luckily, one of the resv being acquired is the VM resv, ++ * and a scheduling entity is only bound to a single VM. As soon as ++ * we acquire the VM resv, we should be safe. ++ * 2. Jobs might depend on fences that were issued by previous jobs in ++ * the same batch, so we can't add dependencies on all jobs before ++ * arming previous jobs and registering the fence to the signal ++ * array, otherwise we might miss dependencies, or point to an ++ * outdated fence. ++ */ ++ if (args->queue_submits.count > 0) { ++ /* All jobs target the same group, so they also point to the same VM. */ ++ struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job); ++ ++ drm_exec_until_all_locked(&ctx.exec) { ++ ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm, ++ args->queue_submits.count); ++ } ++ ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ /* ++ * Now that resvs are locked/prepared, we can iterate over each job to ++ * add the dependencies, arm the job fence, register the job fence to ++ * the signal array. ++ */ ++ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Nothing can fail after that point, so we can make our job fences ++ * visible to the outside world. Push jobs and set the job fences to ++ * the resv slots we reserved. This also pushes the fences to the ++ * syncobjs that are part of the signal array. ++ */ ++ panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs); ++ ++out_cleanup_submit_ctx: ++ panthor_submit_ctx_cleanup(&ctx, panthor_job_put); ++ ++out_free_jobs_args: ++ kvfree(jobs_args); ++ ++out_dev_exit: ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_destroy *args = data; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ return panthor_group_destroy(pfile, args->group_handle); ++} ++ ++static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_create *args = data; ++ struct drm_panthor_queue_create *queue_args; ++ int ret; ++ ++ if (!args->queues.count) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); ++ if (ret) ++ return ret; ++ ++ ret = panthor_group_create(pfile, args, queue_args); ++ if (ret >= 0) { ++ args->group_handle = ret; ++ ret = 0; ++ } ++ ++ kvfree(queue_args); ++ return ret; ++} ++ ++static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_group_get_state *args = data; ++ ++ return panthor_group_get_state(pfile, args); ++} ++ ++static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_tiler_heap_create *args = data; ++ struct panthor_heap_pool *pool; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ pool = panthor_vm_get_heap_pool(vm, true); ++ if (IS_ERR(pool)) { ++ ret = PTR_ERR(pool); ++ goto out_put_vm; ++ } ++ ++ ret = panthor_heap_create(pool, ++ args->initial_chunk_count, ++ args->chunk_size, ++ args->max_chunks, ++ args->target_in_flight, ++ &args->tiler_heap_ctx_gpu_va, ++ &args->first_heap_chunk_gpu_va); ++ if (ret < 0) ++ goto out_put_heap_pool; ++ ++ /* Heap pools are per-VM. We combine the VM and HEAP id to make ++ * a unique heap handle. ++ */ ++ args->handle = (args->vm_id << 16) | ret; ++ ret = 0; ++ ++out_put_heap_pool: ++ panthor_heap_pool_put(pool); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_tiler_heap_destroy *args = data; ++ struct panthor_heap_pool *pool; ++ struct panthor_vm *vm; ++ int ret; ++ ++ if (args->pad) ++ return -EINVAL; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16); ++ if (!vm) ++ return -EINVAL; ++ ++ pool = panthor_vm_get_heap_pool(vm, false); ++ if (!pool) { ++ ret = -EINVAL; ++ goto out_put_vm; ++ } ++ ++ ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0)); ++ panthor_heap_pool_put(pool); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_bind_async(struct drm_device *ddev, ++ struct drm_panthor_vm_bind *args, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_bind_op *jobs_args; ++ struct panthor_submit_ctx ctx; ++ struct panthor_vm *vm; ++ int ret = 0; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); ++ if (ret) ++ goto out_put_vm; ++ ++ ret = panthor_submit_ctx_init(&ctx, file, args->ops.count); ++ if (ret) ++ goto out_free_jobs_args; ++ ++ for (u32 i = 0; i < args->ops.count; i++) { ++ struct drm_panthor_vm_bind_op *op = &jobs_args[i]; ++ struct drm_sched_job *job; ++ ++ job = panthor_vm_bind_job_create(file, vm, op); ++ if (IS_ERR(job)) { ++ ret = PTR_ERR(job); ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ ++ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Prepare reservation objects for each VM_BIND job. */ ++ drm_exec_until_all_locked(&ctx.exec) { ++ for (u32 i = 0; i < ctx.job_count; i++) { ++ ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job); ++ drm_exec_retry_on_contention(&ctx.exec); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ } ++ } ++ ++ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); ++ if (ret) ++ goto out_cleanup_submit_ctx; ++ ++ /* Nothing can fail after that point. */ ++ panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs); ++ ++out_cleanup_submit_ctx: ++ panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put); ++ ++out_free_jobs_args: ++ kvfree(jobs_args); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev, ++ struct drm_panthor_vm_bind *args, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_bind_op *jobs_args; ++ struct panthor_vm *vm; ++ int ret; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); ++ if (ret) ++ goto out_put_vm; ++ ++ for (u32 i = 0; i < args->ops.count; i++) { ++ ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]); ++ if (ret) { ++ /* Update ops.count so the user knows where things failed. */ ++ args->ops.count = i; ++ break; ++ } ++ } ++ ++ kvfree(jobs_args); ++ ++out_put_vm: ++ panthor_vm_put(vm); ++ return ret; ++} ++ ++#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC ++ ++static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct drm_panthor_vm_bind *args = data; ++ int cookie, ret; ++ ++ if (!drm_dev_enter(ddev, &cookie)) ++ return -ENODEV; ++ ++ if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC) ++ ret = panthor_ioctl_vm_bind_async(ddev, args, file); ++ else ++ ret = panthor_ioctl_vm_bind_sync(ddev, args, file); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data, ++ struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ struct drm_panthor_vm_get_state *args = data; ++ struct panthor_vm *vm; ++ ++ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); ++ if (!vm) ++ return -EINVAL; ++ ++ if (panthor_vm_is_unusable(vm)) ++ args->state = DRM_PANTHOR_VM_STATE_UNUSABLE; ++ else ++ args->state = DRM_PANTHOR_VM_STATE_USABLE; ++ ++ panthor_vm_put(vm); ++ return 0; ++} ++ ++static int ++panthor_open(struct drm_device *ddev, struct drm_file *file) ++{ ++ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); ++ struct panthor_file *pfile; ++ int ret; ++ ++ if (!try_module_get(THIS_MODULE)) ++ return -EINVAL; ++ ++ pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); ++ if (!pfile) { ++ ret = -ENOMEM; ++ goto err_put_mod; ++ } ++ ++ pfile->ptdev = ptdev; ++ ++ ret = panthor_vm_pool_create(pfile); ++ if (ret) ++ goto err_free_file; ++ ++ ret = panthor_group_pool_create(pfile); ++ if (ret) ++ goto err_destroy_vm_pool; ++ ++ file->driver_priv = pfile; ++ return 0; ++ ++err_destroy_vm_pool: ++ panthor_vm_pool_destroy(pfile); ++ ++err_free_file: ++ kfree(pfile); ++ ++err_put_mod: ++ module_put(THIS_MODULE); ++ return ret; ++} ++ ++static void ++panthor_postclose(struct drm_device *ddev, struct drm_file *file) ++{ ++ struct panthor_file *pfile = file->driver_priv; ++ ++ panthor_group_pool_destroy(pfile); ++ panthor_vm_pool_destroy(pfile); ++ ++ kfree(pfile); ++ module_put(THIS_MODULE); ++} ++ ++static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { ++#define PANTHOR_IOCTL(n, func, flags) \ ++ DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags) ++ ++ PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), ++ PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), ++}; ++ ++static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) ++{ ++ struct drm_file *file = filp->private_data; ++ struct panthor_file *pfile = file->driver_priv; ++ struct panthor_device *ptdev = pfile->ptdev; ++ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; ++ int ret, cookie; ++ ++ if (!drm_dev_enter(file->minor->dev, &cookie)) ++ return -ENODEV; ++ ++ if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET) ++ ret = panthor_device_mmap_io(ptdev, vma); ++ else ++ ret = drm_gem_mmap(filp, vma); ++ ++ drm_dev_exit(cookie); ++ return ret; ++} ++ ++static const struct file_operations panthor_drm_driver_fops = { ++ .open = drm_open, ++ .release = drm_release, ++ .unlocked_ioctl = drm_ioctl, ++ .compat_ioctl = drm_compat_ioctl, ++ .poll = drm_poll, ++ .read = drm_read, ++ .llseek = noop_llseek, ++ .mmap = panthor_mmap, ++}; ++ ++#ifdef CONFIG_DEBUG_FS ++static void panthor_debugfs_init(struct drm_minor *minor) ++{ ++ panthor_mmu_debugfs_init(minor); ++} ++#endif ++ ++/* ++ * PanCSF driver version: ++ * - 1.0 - initial interface ++ */ ++static const struct drm_driver panthor_drm_driver = { ++ .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | ++ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, ++ .open = panthor_open, ++ .postclose = panthor_postclose, ++ .ioctls = panthor_drm_driver_ioctls, ++ .num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls), ++ .fops = &panthor_drm_driver_fops, ++ .name = "panthor", ++ .desc = "Panthor DRM driver", ++ .date = "20230801", ++ .major = 1, ++ .minor = 0, ++ ++ .gem_create_object = panthor_gem_create_object, ++ .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, ++#ifdef CONFIG_DEBUG_FS ++ .debugfs_init = panthor_debugfs_init, ++#endif ++}; ++ ++static int panthor_probe(struct platform_device *pdev) ++{ ++ struct panthor_device *ptdev; ++ ++ ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver, ++ struct panthor_device, base); ++ if (!ptdev) ++ return -ENOMEM; ++ ++ platform_set_drvdata(pdev, ptdev); ++ ++ return panthor_device_init(ptdev); ++} ++ ++static void panthor_remove(struct platform_device *pdev) ++{ ++ struct panthor_device *ptdev = platform_get_drvdata(pdev); ++ ++ panthor_device_unplug(ptdev); ++} ++ ++static const struct of_device_id dt_match[] = { ++ { .compatible = "rockchip,rk3588-mali" }, ++ { .compatible = "arm,mali-valhall-csf" }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, dt_match); ++ ++static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops, ++ panthor_device_suspend, ++ panthor_device_resume, ++ NULL); ++ ++static struct platform_driver panthor_driver = { ++ .probe = panthor_probe, ++ .remove_new = panthor_remove, ++ .driver = { ++ .name = "panthor", ++ .pm = &panthor_pm_ops, ++ .of_match_table = dt_match, ++ }, ++}; ++ ++/* ++ * Workqueue used to cleanup stuff. ++ * ++ * We create a dedicated workqueue so we can drain on unplug and ++ * make sure all resources are freed before the module is unloaded. ++ */ ++struct workqueue_struct *panthor_cleanup_wq; ++ ++static int __init panthor_init(void) ++{ ++ int ret; ++ ++ ret = panthor_mmu_pt_cache_init(); ++ if (ret) ++ return ret; ++ ++ panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0); ++ if (!panthor_cleanup_wq) { ++ pr_err("panthor: Failed to allocate the workqueues"); ++ ret = -ENOMEM; ++ goto err_mmu_pt_cache_fini; ++ } ++ ++ ret = platform_driver_register(&panthor_driver); ++ if (ret) ++ goto err_destroy_cleanup_wq; ++ ++ return 0; ++ ++err_destroy_cleanup_wq: ++ destroy_workqueue(panthor_cleanup_wq); ++ ++err_mmu_pt_cache_fini: ++ panthor_mmu_pt_cache_fini(); ++ return ret; ++} ++module_init(panthor_init); ++ ++static void __exit panthor_exit(void) ++{ ++ platform_driver_unregister(&panthor_driver); ++ destroy_workqueue(panthor_cleanup_wq); ++ panthor_mmu_pt_cache_fini(); ++} ++module_exit(panthor_exit); ++ ++MODULE_AUTHOR("Panthor Project Developers"); ++MODULE_DESCRIPTION("Panthor DRM Driver"); ++MODULE_LICENSE("Dual MIT/GPL"); +-- +2.42.0 + + +From 434e82fc99c22d6e3780294a5813815d228029e0 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:26 +0100 +Subject: [PATCH 14/71] [MERGED] drm/panthor: Allow driver compilation + +Now that all blocks are available, we can add/update Kconfig/Makefile +files to allow compilation. + +v6: +- Add Maxime's and Heiko's acks +- Keep source files alphabetically ordered in the Makefile + +v4: +- Add Steve's R-b + +v3: +- Add a dep on DRM_GPUVM +- Fix dependencies in Kconfig +- Expand help text to (hopefully) describe which GPUs are to be + supported by this driver and which are for panfrost. + +Co-developed-by: Steven Price +Signed-off-by: Steven Price +Signed-off-by: Boris Brezillon +Acked-by: Steven Price # MIT+GPL2 relicensing,Arm +Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro +Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-13-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + drivers/gpu/drm/Kconfig | 2 ++ + drivers/gpu/drm/Makefile | 1 + + drivers/gpu/drm/panthor/Kconfig | 23 +++++++++++++++++++++++ + drivers/gpu/drm/panthor/Makefile | 14 ++++++++++++++ + 4 files changed, 40 insertions(+) + create mode 100644 drivers/gpu/drm/panthor/Kconfig + create mode 100644 drivers/gpu/drm/panthor/Makefile + +diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig +index c7edba18a6f0..b94a2fe4f462 100644 +--- a/drivers/gpu/drm/Kconfig ++++ b/drivers/gpu/drm/Kconfig +@@ -384,6 +384,8 @@ source "drivers/gpu/drm/lima/Kconfig" + + source "drivers/gpu/drm/panfrost/Kconfig" + ++source "drivers/gpu/drm/panthor/Kconfig" ++ + source "drivers/gpu/drm/aspeed/Kconfig" + + source "drivers/gpu/drm/mcde/Kconfig" +diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile +index 104b42df2e95..6eb2b553a163 100644 +--- a/drivers/gpu/drm/Makefile ++++ b/drivers/gpu/drm/Makefile +@@ -179,6 +179,7 @@ obj-$(CONFIG_DRM_XEN) += xen/ + obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ + obj-$(CONFIG_DRM_LIMA) += lima/ + obj-$(CONFIG_DRM_PANFROST) += panfrost/ ++obj-$(CONFIG_DRM_PANTHOR) += panthor/ + obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ + obj-$(CONFIG_DRM_MCDE) += mcde/ + obj-$(CONFIG_DRM_TIDSS) += tidss/ +diff --git a/drivers/gpu/drm/panthor/Kconfig b/drivers/gpu/drm/panthor/Kconfig +new file mode 100644 +index 000000000000..55b40ad07f3b +--- /dev/null ++++ b/drivers/gpu/drm/panthor/Kconfig +@@ -0,0 +1,23 @@ ++# SPDX-License-Identifier: GPL-2.0 or MIT ++ ++config DRM_PANTHOR ++ tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)" ++ depends on DRM ++ depends on ARM || ARM64 || COMPILE_TEST ++ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE ++ depends on MMU ++ select DEVFREQ_GOV_SIMPLE_ONDEMAND ++ select DRM_EXEC ++ select DRM_GEM_SHMEM_HELPER ++ select DRM_GPUVM ++ select DRM_SCHED ++ select IOMMU_IO_PGTABLE_LPAE ++ select IOMMU_SUPPORT ++ select PM_DEVFREQ ++ help ++ DRM driver for ARM Mali CSF-based GPUs. ++ ++ This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. ++ ++ Note that the Mali-G68 and Mali-G78, while Valhall architecture, will ++ be supported with the panfrost driver as they are not CSF GPUs. +diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile +new file mode 100644 +index 000000000000..15294719b09c +--- /dev/null ++++ b/drivers/gpu/drm/panthor/Makefile +@@ -0,0 +1,14 @@ ++# SPDX-License-Identifier: GPL-2.0 or MIT ++ ++panthor-y := \ ++ panthor_devfreq.o \ ++ panthor_device.o \ ++ panthor_drv.o \ ++ panthor_fw.o \ ++ panthor_gem.o \ ++ panthor_gpu.o \ ++ panthor_heap.o \ ++ panthor_mmu.o \ ++ panthor_sched.o ++ ++obj-$(CONFIG_DRM_PANTHOR) += panthor.o +-- +2.42.0 + + +From ab2f352d20d3d53b0fa647e18a1abff9d2bf8493 Mon Sep 17 00:00:00 2001 +From: Liviu Dudau +Date: Thu, 29 Feb 2024 17:22:27 +0100 +Subject: [PATCH 15/71] [MERGED] dt-bindings: gpu: mali-valhall-csf: Add + support for Arm Mali CSF GPUs + +Arm has introduced a new v10 GPU architecture that replaces the Job Manager +interface with a new Command Stream Frontend. It adds firmware driven +command stream queues that can be used by kernel and user space to submit +jobs to the GPU. + +Add the initial schema for the device tree that is based on support for +RK3588 SoC. The minimum number of clocks is one for the IP, but on Rockchip +platforms they will tend to expose the semi-independent clocks for better +power management. + +v6: +- Add Maxime's and Heiko's acks + +v5: +- Move the opp-table node under the gpu node + +v4: +- Fix formatting issue + +v3: +- Cleanup commit message to remove redundant text +- Added opp-table property and re-ordered entries +- Clarified power-domains and power-domain-names requirements for RK3588. +- Cleaned up example + +Note: power-domains and power-domain-names requirements for other platforms +are still work in progress, hence the bindings are left incomplete here. + +v2: +- New commit + +Signed-off-by: Liviu Dudau +Cc: Krzysztof Kozlowski +Cc: Rob Herring +Cc: Conor Dooley +Cc: devicetree@vger.kernel.org +Signed-off-by: Boris Brezillon +Reviewed-by: Rob Herring +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-14-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + .../bindings/gpu/arm,mali-valhall-csf.yaml | 147 ++++++++++++++++++ + 1 file changed, 147 insertions(+) + create mode 100644 Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml + +diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml +new file mode 100644 +index 000000000000..a5b4e0021758 +--- /dev/null ++++ b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml +@@ -0,0 +1,147 @@ ++# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/gpu/arm,mali-valhall-csf.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: ARM Mali Valhall GPU ++ ++maintainers: ++ - Liviu Dudau ++ - Boris Brezillon ++ ++properties: ++ $nodename: ++ pattern: '^gpu@[a-f0-9]+$' ++ ++ compatible: ++ oneOf: ++ - items: ++ - enum: ++ - rockchip,rk3588-mali ++ - const: arm,mali-valhall-csf # Mali Valhall GPU model/revision is fully discoverable ++ ++ reg: ++ maxItems: 1 ++ ++ interrupts: ++ items: ++ - description: Job interrupt ++ - description: MMU interrupt ++ - description: GPU interrupt ++ ++ interrupt-names: ++ items: ++ - const: job ++ - const: mmu ++ - const: gpu ++ ++ clocks: ++ minItems: 1 ++ maxItems: 3 ++ ++ clock-names: ++ minItems: 1 ++ items: ++ - const: core ++ - const: coregroup ++ - const: stacks ++ ++ mali-supply: true ++ ++ operating-points-v2: true ++ opp-table: ++ type: object ++ ++ power-domains: ++ minItems: 1 ++ maxItems: 5 ++ ++ power-domain-names: ++ minItems: 1 ++ maxItems: 5 ++ ++ sram-supply: true ++ ++ "#cooling-cells": ++ const: 2 ++ ++ dynamic-power-coefficient: ++ $ref: /schemas/types.yaml#/definitions/uint32 ++ description: ++ A u32 value that represents the running time dynamic ++ power coefficient in units of uW/MHz/V^2. The ++ coefficient can either be calculated from power ++ measurements or derived by analysis. ++ ++ The dynamic power consumption of the GPU is ++ proportional to the square of the Voltage (V) and ++ the clock frequency (f). The coefficient is used to ++ calculate the dynamic power as below - ++ ++ Pdyn = dynamic-power-coefficient * V^2 * f ++ ++ where voltage is in V, frequency is in MHz. ++ ++ dma-coherent: true ++ ++required: ++ - compatible ++ - reg ++ - interrupts ++ - interrupt-names ++ - clocks ++ - mali-supply ++ ++additionalProperties: false ++ ++allOf: ++ - if: ++ properties: ++ compatible: ++ contains: ++ const: rockchip,rk3588-mali ++ then: ++ properties: ++ clocks: ++ minItems: 3 ++ power-domains: ++ maxItems: 1 ++ power-domain-names: false ++ ++examples: ++ - | ++ #include ++ #include ++ #include ++ #include ++ ++ gpu: gpu@fb000000 { ++ compatible = "rockchip,rk3588-mali", "arm,mali-valhall-csf"; ++ reg = <0xfb000000 0x200000>; ++ interrupts = , ++ , ++ ; ++ interrupt-names = "job", "mmu", "gpu"; ++ clock-names = "core", "coregroup", "stacks"; ++ clocks = <&cru CLK_GPU>, <&cru CLK_GPU_COREGROUP>, ++ <&cru CLK_GPU_STACKS>; ++ power-domains = <&power RK3588_PD_GPU>; ++ operating-points-v2 = <&gpu_opp_table>; ++ mali-supply = <&vdd_gpu_s0>; ++ sram-supply = <&vdd_gpu_mem_s0>; ++ ++ gpu_opp_table: opp-table { ++ compatible = "operating-points-v2"; ++ opp-300000000 { ++ opp-hz = /bits/ 64 <300000000>; ++ opp-microvolt = <675000 675000 850000>; ++ }; ++ opp-400000000 { ++ opp-hz = /bits/ 64 <400000000>; ++ opp-microvolt = <675000 675000 850000>; ++ }; ++ }; ++ }; ++ ++... +-- +2.42.0 + + +From c6aae1310691381755a207a206693299dd7f92e6 Mon Sep 17 00:00:00 2001 +From: Boris Brezillon +Date: Thu, 29 Feb 2024 17:22:28 +0100 +Subject: [PATCH 16/71] [MERGED] drm/panthor: Add an entry to MAINTAINERS + +Add an entry for the Panthor driver to the MAINTAINERS file. + +v6: +- Add Maxime's and Heiko's acks + +v4: +- Add Steve's R-b + +v3: +- Add bindings document as an 'F:' line. +- Add Steven and Liviu as co-maintainers. + +Signed-off-by: Boris Brezillon +Reviewed-by: Steven Price +Acked-by: Maxime Ripard +Acked-by: Heiko Stuebner +Link: https://lore.kernel.org/r/20240229162230.2634044-15-boris.brezillon@collabora.com +Signed-off-by: Sebastian Reichel +--- + MAINTAINERS | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/MAINTAINERS b/MAINTAINERS +index 4f298c4187fb..252fb777ba43 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -1669,6 +1669,17 @@ F: Documentation/gpu/panfrost.rst + F: drivers/gpu/drm/panfrost/ + F: include/uapi/drm/panfrost_drm.h + ++ARM MALI PANTHOR DRM DRIVER ++M: Boris Brezillon ++M: Steven Price ++M: Liviu Dudau ++L: dri-devel@lists.freedesktop.org ++S: Supported ++T: git git://anongit.freedesktop.org/drm/drm-misc ++F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml ++F: drivers/gpu/drm/panthor/ ++F: include/uapi/drm/panthor_drm.h ++ + ARM MALI-DP DRM DRIVER + M: Liviu Dudau + S: Supported +-- +2.42.0 + + +From 60a60db1353127e87cffbd5b9376b78837ab5f44 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Tue, 13 Feb 2024 17:32:36 +0100 +Subject: [PATCH 17/71] [MERGED] dt-bindings: soc: rockchip: add rk3588 USB3 + syscon + +RK3588 USB3 support requires the GRF for USB and USBDP PHY. + +Acked-by: Conor Dooley +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240213163609.44930-3-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +index 9793ea6f0fe6..b4712c4c7bca 100644 +--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml ++++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +@@ -28,6 +28,8 @@ properties: + - rockchip,rk3588-sys-grf + - rockchip,rk3588-pcie3-phy-grf + - rockchip,rk3588-pcie3-pipe-grf ++ - rockchip,rk3588-usb-grf ++ - rockchip,rk3588-usbdpphy-grf + - rockchip,rk3588-vo-grf + - rockchip,rk3588-vop-grf + - rockchip,rv1108-usbgrf +-- +2.42.0 + + +From e02f5c57b3037860c7deed555593a5fe0c4fe19a Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Tue, 13 Feb 2024 17:32:35 +0100 +Subject: [PATCH 18/71] [MERGED] dt-bindings: soc: rockchip: add clock to + RK3588 VO grf + +The RK3588 VO GRF needs a clock. This adds the clock to the allowed +properties, makes it mandatory for the RK3588 VO grf and disallows it +for any other Rockchip grf. + +Acked-by: Conor Dooley +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240213163609.44930-2-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + .../devicetree/bindings/soc/rockchip/grf.yaml | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +index b4712c4c7bca..12e7a78f7f6b 100644 +--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml ++++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +@@ -68,6 +68,9 @@ properties: + reg: + maxItems: 1 + ++ clocks: ++ maxItems: 1 ++ + "#address-cells": + const: 1 + +@@ -250,6 +253,22 @@ allOf: + + unevaluatedProperties: false + ++ - if: ++ properties: ++ compatible: ++ contains: ++ enum: ++ - rockchip,rk3588-vo-grf ++ ++ then: ++ required: ++ - clocks ++ ++ else: ++ properties: ++ clocks: false ++ ++ + examples: + - | + #include +-- +2.42.0 + + +From 574b9b71e862ed9b498ed4746f7999180820905c Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:22 +0100 +Subject: [PATCH 19/71] [MERGED] clk: rockchip: rk3588: fix CLK_NR_CLKS usage + +CLK_NR_CLKS is not part of the DT bindings and needs to be removed +from it, just like it recently happened for other platforms. This +takes care of it by introducing a new function identifying the +maximum used clock ID at runtime. + +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-2-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + drivers/clk/rockchip/clk-rk3588.c | 5 ++++- + drivers/clk/rockchip/clk.c | 17 +++++++++++++++++ + drivers/clk/rockchip/clk.h | 2 ++ + 3 files changed, 23 insertions(+), 1 deletion(-) + +diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c +index 6994165e0395..0b60ae78f9d8 100644 +--- a/drivers/clk/rockchip/clk-rk3588.c ++++ b/drivers/clk/rockchip/clk-rk3588.c +@@ -2458,15 +2458,18 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + static void __init rk3588_clk_init(struct device_node *np) + { + struct rockchip_clk_provider *ctx; ++ unsigned long clk_nr_clks; + void __iomem *reg_base; + ++ clk_nr_clks = rockchip_clk_find_max_clk_id(rk3588_clk_branches, ++ ARRAY_SIZE(rk3588_clk_branches)) + 1; + reg_base = of_iomap(np, 0); + if (!reg_base) { + pr_err("%s: could not map cru region\n", __func__); + return; + } + +- ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); ++ ctx = rockchip_clk_init(np, reg_base, clk_nr_clks); + if (IS_ERR(ctx)) { + pr_err("%s: rockchip clk init failed\n", __func__); + iounmap(reg_base); +diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c +index 4059d9365ae6..73d2cbdc716b 100644 +--- a/drivers/clk/rockchip/clk.c ++++ b/drivers/clk/rockchip/clk.c +@@ -429,6 +429,23 @@ void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, + } + EXPORT_SYMBOL_GPL(rockchip_clk_register_plls); + ++unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list, ++ unsigned int nr_clk) ++{ ++ unsigned long max = 0; ++ unsigned int idx; ++ ++ for (idx = 0; idx < nr_clk; idx++, list++) { ++ if (list->id > max) ++ max = list->id; ++ if (list->child && list->child->id > max) ++ max = list->id; ++ } ++ ++ return max; ++} ++EXPORT_SYMBOL_GPL(rockchip_clk_find_max_clk_id); ++ + void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, + struct rockchip_clk_branch *list, + unsigned int nr_clk) +diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h +index 758ebaf2236b..fd3b476dedda 100644 +--- a/drivers/clk/rockchip/clk.h ++++ b/drivers/clk/rockchip/clk.h +@@ -973,6 +973,8 @@ struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, + void __iomem *base, unsigned long nr_clks); + void rockchip_clk_of_add_provider(struct device_node *np, + struct rockchip_clk_provider *ctx); ++unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list, ++ unsigned int nr_clk); + void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, + struct rockchip_clk_branch *list, + unsigned int nr_clk); +-- +2.42.0 + + +From 8870f8250105e788b4051b29d96338bf2b077abf Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:23 +0100 +Subject: [PATCH 20/71] [MERGED] dt-bindings: clock: rk3588: drop CLK_NR_CLKS + +CLK_NR_CLKS should not be part of the binding. Let's drop it, since +the kernel code no longer uses it either. + +Reviewed-by: Krzysztof Kozlowski +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-3-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + include/dt-bindings/clock/rockchip,rk3588-cru.h | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/include/dt-bindings/clock/rockchip,rk3588-cru.h b/include/dt-bindings/clock/rockchip,rk3588-cru.h +index 5790b1391201..7c6f0ec7c979 100644 +--- a/include/dt-bindings/clock/rockchip,rk3588-cru.h ++++ b/include/dt-bindings/clock/rockchip,rk3588-cru.h +@@ -734,8 +734,6 @@ + #define PCLK_AV1_PRE 719 + #define HCLK_SDIO_PRE 720 + +-#define CLK_NR_CLKS (HCLK_SDIO_PRE + 1) +- + /* scmi-clocks indices */ + + #define SCMI_CLK_CPUL 0 +-- +2.42.0 + + +From 7711e828a40f44f43d972220ffc043e5533112b4 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:24 +0100 +Subject: [PATCH 21/71] [MERGED] dt-bindings: clock: rk3588: add missing + PCLK_VO1GRF + +Add PCLK_VO1GRF to complement PCLK_VO0GRF. This will be needed +for HDMI support. + +Acked-by: Krzysztof Kozlowski +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-4-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + include/dt-bindings/clock/rockchip,rk3588-cru.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/include/dt-bindings/clock/rockchip,rk3588-cru.h b/include/dt-bindings/clock/rockchip,rk3588-cru.h +index 7c6f0ec7c979..0c7d3ca2d5bc 100644 +--- a/include/dt-bindings/clock/rockchip,rk3588-cru.h ++++ b/include/dt-bindings/clock/rockchip,rk3588-cru.h +@@ -733,6 +733,7 @@ + #define ACLK_AV1_PRE 718 + #define PCLK_AV1_PRE 719 + #define HCLK_SDIO_PRE 720 ++#define PCLK_VO1GRF 721 + + /* scmi-clocks indices */ + +-- +2.42.0 + + +From 3cf6f7edca73bb7e491a84cba7f84c079cee8888 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:25 +0100 +Subject: [PATCH 22/71] [MERGED] clk: rockchip: rk3588: fix pclk_vo0grf and + pclk_vo1grf + +Currently pclk_vo1grf is not exposed, but it should be referenced +from the vo1_grf syscon, which needs it enabled. That syscon is +required for HDMI RX and TX functionality among other things. + +Apart from that pclk_vo0grf and pclk_vo1grf are both linked gates +and need the VO's hclk enabled in addition to their parent clock. + +No Fixes tag has been added, since the logic requiring these clocks +is not yet upstream anyways. + +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-5-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + drivers/clk/rockchip/clk-rk3588.c | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c +index 0b60ae78f9d8..26330d655159 100644 +--- a/drivers/clk/rockchip/clk-rk3588.c ++++ b/drivers/clk/rockchip/clk-rk3588.c +@@ -1851,8 +1851,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + RK3588_CLKGATE_CON(56), 0, GFLAGS), + GATE(PCLK_TRNG0, "pclk_trng0", "pclk_vo0_root", 0, + RK3588_CLKGATE_CON(56), 1, GFLAGS), +- GATE(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", CLK_IGNORE_UNUSED, +- RK3588_CLKGATE_CON(55), 10, GFLAGS), + COMPOSITE(CLK_I2S4_8CH_TX_SRC, "clk_i2s4_8ch_tx_src", gpll_aupll_p, 0, + RK3588_CLKSEL_CON(118), 5, 1, MFLAGS, 0, 5, DFLAGS, + RK3588_CLKGATE_CON(56), 11, GFLAGS), +@@ -1998,8 +1996,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + RK3588_CLKGATE_CON(60), 9, GFLAGS), + GATE(PCLK_TRNG1, "pclk_trng1", "pclk_vo1_root", 0, + RK3588_CLKGATE_CON(60), 10, GFLAGS), +- GATE(0, "pclk_vo1grf", "pclk_vo1_root", CLK_IGNORE_UNUSED, +- RK3588_CLKGATE_CON(59), 12, GFLAGS), + GATE(PCLK_S_EDP0, "pclk_s_edp0", "pclk_vo1_s_root", 0, + RK3588_CLKGATE_CON(59), 14, GFLAGS), + GATE(PCLK_S_EDP1, "pclk_s_edp1", "pclk_vo1_s_root", 0, +@@ -2447,12 +2443,14 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), + GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), + GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", "aclk_vop_low_root", 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), +- GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", 0, RK3588_CLKGATE_CON(55), 5, GFLAGS), ++ GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), + GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), +- GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 9, GFLAGS), ++ GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), + GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), + GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), + GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", "hclk_nvm", 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), ++ GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", "hclk_vo0", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), ++ GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", "hclk_vo1", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), + }; + + static void __init rk3588_clk_init(struct device_node *np) +-- +2.42.0 + + +From 46d9a9dc8d73877695664f94582700e0a09df56e Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:26 +0100 +Subject: [PATCH 23/71] [MERGED] clk: rockchip: rk3588: fix indent + +pclk_mailbox2 is the only RK3588 clock indented with one tab instead of +two tabs. Let's fix this. + +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-6-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + drivers/clk/rockchip/clk-rk3588.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c +index 26330d655159..2e8bdd93c625 100644 +--- a/drivers/clk/rockchip/clk-rk3588.c ++++ b/drivers/clk/rockchip/clk-rk3588.c +@@ -1004,7 +1004,7 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + GATE(PCLK_MAILBOX1, "pclk_mailbox1", "pclk_top_root", 0, + RK3588_CLKGATE_CON(16), 12, GFLAGS), + GATE(PCLK_MAILBOX2, "pclk_mailbox2", "pclk_top_root", 0, +- RK3588_CLKGATE_CON(16), 13, GFLAGS), ++ RK3588_CLKGATE_CON(16), 13, GFLAGS), + GATE(PCLK_PMU2, "pclk_pmu2", "pclk_top_root", CLK_IS_CRITICAL, + RK3588_CLKGATE_CON(19), 3, GFLAGS), + GATE(PCLK_PMUCM0_INTMUX, "pclk_pmucm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, +-- +2.42.0 + + +From 3f9015c18ad28d067606ab9ff5985f0446919980 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Fri, 26 Jan 2024 19:18:27 +0100 +Subject: [PATCH 24/71] [MERGED] clk: rockchip: rk3588: use linked clock ID for + GATE_LINK + +In preparation for properly supporting GATE_LINK switch the unused +linked clock argument from the clock's name to its ID. This allows +easy and fast lookup of the 'struct clk'. + +Signed-off-by: Sebastian Reichel +Link: https://lore.kernel.org/r/20240126182919.48402-7-sebastian.reichel@collabora.com +Signed-off-by: Heiko Stuebner +--- + drivers/clk/rockchip/clk-rk3588.c | 46 +++++++++++++++---------------- + 1 file changed, 23 insertions(+), 23 deletions(-) + +diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c +index 2e8bdd93c625..b30279a96dc8 100644 +--- a/drivers/clk/rockchip/clk-rk3588.c ++++ b/drivers/clk/rockchip/clk-rk3588.c +@@ -29,7 +29,7 @@ + * power, but avoids leaking implementation details into DT or hanging the + * system. + */ +-#define GATE_LINK(_id, cname, pname, linkname, f, o, b, gf) \ ++#define GATE_LINK(_id, cname, pname, linkedclk, f, o, b, gf) \ + GATE(_id, cname, pname, f, o, b, gf) + #define RK3588_LINKED_CLK CLK_IS_CRITICAL + +@@ -2429,28 +2429,28 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { + GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0, + RK3588_CLKGATE_CON(68), 2, GFLAGS), + +- GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", "aclk_vi_root", 0, RK3588_CLKGATE_CON(26), 6, GFLAGS), +- GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", "hclk_vi_root", 0, RK3588_CLKGATE_CON(26), 8, GFLAGS), +- GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", "aclk_nvm_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS), +- GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 2, GFLAGS), +- GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 3, GFLAGS), +- GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 7, GFLAGS), +- GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 5, GFLAGS), +- GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", "aclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 3, GFLAGS), +- GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", "hclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 2, GFLAGS), +- GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 5, GFLAGS), +- GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 6, GFLAGS), +- GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), +- GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), +- GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", "aclk_vop_low_root", 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), +- GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), +- GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), +- GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), +- GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), +- GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), +- GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", "hclk_nvm", 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), +- GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", "hclk_vo0", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), +- GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", "hclk_vo1", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), ++ GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", ACLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 6, GFLAGS), ++ GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", HCLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 8, GFLAGS), ++ GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS), ++ GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 2, GFLAGS), ++ GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", HCLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 3, GFLAGS), ++ GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 7, GFLAGS), ++ GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 5, GFLAGS), ++ GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", ACLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 3, GFLAGS), ++ GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", HCLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 2, GFLAGS), ++ GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 5, GFLAGS), ++ GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 6, GFLAGS), ++ GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), ++ GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), ++ GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", ACLK_VOP_LOW_ROOT, 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), ++ GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", HCLK_VOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), ++ GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), ++ GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", HCLK_VO1USB_TOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), ++ GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), ++ GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), ++ GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", HCLK_NVM, 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), ++ GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", HCLK_VO0, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), ++ GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", HCLK_VO1, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), + }; + + static void __init rk3588_clk_init(struct device_node *np) +-- +2.42.0 + + +From b84379d92402ffb2deca4b0390f210cbcc52dec0 Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Fri, 19 Jan 2024 21:38:01 +0200 +Subject: [PATCH 25/71] [MERGED] dt-bindings: soc: rockchip: Add rk3588 + hdptxphy syscon + +Add compatible for the hdptxphy GRF used by rk3588-hdptx-phy. + +Signed-off-by: Cristian Ciocaltea +Acked-by: Krzysztof Kozlowski +Link: https://lore.kernel.org/r/20240119193806.1030214-2-cristian.ciocaltea@collabora.com +Signed-off-by: Heiko Stuebner +--- + Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +index 12e7a78f7f6b..0b87c266760c 100644 +--- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml ++++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml +@@ -22,6 +22,7 @@ properties: + - rockchip,rk3568-usb2phy-grf + - rockchip,rk3588-bigcore0-grf + - rockchip,rk3588-bigcore1-grf ++ - rockchip,rk3588-hdptxphy-grf + - rockchip,rk3588-ioc + - rockchip,rk3588-php-grf + - rockchip,rk3588-pipe-phy-grf +-- +2.42.0 + + +From c6862f1e481f3a07f4a4541fa39f07674cd0d8df Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Wed, 14 Feb 2024 13:45:36 +0200 +Subject: [PATCH 26/71] [MERGED] dt-bindings: phy: Add Rockchip HDMI/eDP Combo + PHY schema + +Add dt-binding schema for the HDMI/eDP Transmitter Combo PHY found on +Rockchip RK3588 SoC. + +Reviewed-by: Krzysztof Kozlowski +Reviewed-by: Heiko Stuebner +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20240214-phy-hdptx-v4-1-e7974f46c1a7@collabora.com +Signed-off-by: Vinod Koul +--- + .../phy/rockchip,rk3588-hdptx-phy.yaml | 91 +++++++++++++++++++ + 1 file changed, 91 insertions(+) + create mode 100644 Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml + +diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml +new file mode 100644 +index 000000000000..54e822c715f3 +--- /dev/null ++++ b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml +@@ -0,0 +1,91 @@ ++# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) ++%YAML 1.2 ++--- ++$id: http://devicetree.org/schemas/phy/rockchip,rk3588-hdptx-phy.yaml# ++$schema: http://devicetree.org/meta-schemas/core.yaml# ++ ++title: Rockchip SoC HDMI/eDP Transmitter Combo PHY ++ ++maintainers: ++ - Cristian Ciocaltea ++ ++properties: ++ compatible: ++ enum: ++ - rockchip,rk3588-hdptx-phy ++ ++ reg: ++ maxItems: 1 ++ ++ clocks: ++ items: ++ - description: Reference clock ++ - description: APB clock ++ ++ clock-names: ++ items: ++ - const: ref ++ - const: apb ++ ++ "#phy-cells": ++ const: 0 ++ ++ resets: ++ items: ++ - description: PHY reset line ++ - description: APB reset line ++ - description: INIT reset line ++ - description: CMN reset line ++ - description: LANE reset line ++ - description: ROPLL reset line ++ - description: LCPLL reset line ++ ++ reset-names: ++ items: ++ - const: phy ++ - const: apb ++ - const: init ++ - const: cmn ++ - const: lane ++ - const: ropll ++ - const: lcpll ++ ++ rockchip,grf: ++ $ref: /schemas/types.yaml#/definitions/phandle ++ description: Some PHY related data is accessed through GRF regs. ++ ++required: ++ - compatible ++ - reg ++ - clocks ++ - clock-names ++ - "#phy-cells" ++ - resets ++ - reset-names ++ - rockchip,grf ++ ++additionalProperties: false ++ ++examples: ++ - | ++ #include ++ #include ++ ++ soc { ++ #address-cells = <2>; ++ #size-cells = <2>; ++ ++ phy@fed60000 { ++ compatible = "rockchip,rk3588-hdptx-phy"; ++ reg = <0x0 0xfed60000 0x0 0x2000>; ++ clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>; ++ clock-names = "ref", "apb"; ++ #phy-cells = <0>; ++ resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>, ++ <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>, ++ <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>, ++ <&cru SRST_HDPTX0_LCPLL>; ++ reset-names = "phy", "apb", "init", "cmn", "lane", "ropll", "lcpll"; ++ rockchip,grf = <&hdptxphy_grf>; ++ }; ++ }; +-- +2.42.0 + + +From 479930e8dfba3b8ff66ec70c2a42b235cea386f7 Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Mon, 19 Feb 2024 22:37:24 +0200 +Subject: [PATCH 27/71] [MERGED] arm64: defconfig: Enable Rockchip HDMI/eDP + Combo PHY + +Enable support for the Rockchip HDMI/eDP Combo PHY, which is based on a +Samsung IP block. This is used by the RK3588 SoC family. + +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20240219203725.283532-1-cristian.ciocaltea@collabora.com +Signed-off-by: Heiko Stuebner +--- + arch/arm64/configs/defconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig +index e6cf3e5d63c3..134dce860641 100644 +--- a/arch/arm64/configs/defconfig ++++ b/arch/arm64/configs/defconfig +@@ -1490,6 +1490,7 @@ CONFIG_PHY_ROCKCHIP_INNO_USB2=y + CONFIG_PHY_ROCKCHIP_INNO_DSIDPHY=m + CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY=m + CONFIG_PHY_ROCKCHIP_PCIE=m ++CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX=m + CONFIG_PHY_ROCKCHIP_SNPS_PCIE3=y + CONFIG_PHY_ROCKCHIP_TYPEC=y + CONFIG_PHY_SAMSUNG_UFS=y +-- +2.42.0 + + +From fc60dd1408ceaf8c4f88e684ee8d7b880e0bdb8a Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Mon, 19 Feb 2024 22:46:25 +0200 +Subject: [PATCH 28/71] [MERGED] arm64: dts: rockchip: Add HDMI0 PHY to rk3588 + +Add DT nodes for HDMI0 PHY and related syscon found on RK3588 SoC. + +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20240219204626.284399-1-cristian.ciocaltea@collabora.com +Signed-off-by: Heiko Stuebner +--- + arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +index 36b1b7acfe6a..3a15a30543c3 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi ++++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +@@ -586,6 +586,11 @@ u2phy3_host: host-port { + }; + }; + ++ hdptxphy0_grf: syscon@fd5e0000 { ++ compatible = "rockchip,rk3588-hdptxphy-grf", "syscon"; ++ reg = <0x0 0xfd5e0000 0x0 0x100>; ++ }; ++ + ioc: syscon@fd5f0000 { + compatible = "rockchip,rk3588-ioc", "syscon"; + reg = <0x0 0xfd5f0000 0x0 0x10000>; +@@ -2360,6 +2365,22 @@ dmac2: dma-controller@fed10000 { + #dma-cells = <1>; + }; + ++ hdptxphy_hdmi0: phy@fed60000 { ++ compatible = "rockchip,rk3588-hdptx-phy"; ++ reg = <0x0 0xfed60000 0x0 0x2000>; ++ clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>; ++ clock-names = "ref", "apb"; ++ #phy-cells = <0>; ++ resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>, ++ <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>, ++ <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>, ++ <&cru SRST_HDPTX0_LCPLL>; ++ reset-names = "phy", "apb", "init", "cmn", "lane", "ropll", ++ "lcpll"; ++ rockchip,grf = <&hdptxphy0_grf>; ++ status = "disabled"; ++ }; ++ + combphy0_ps: phy@fee00000 { + compatible = "rockchip,rk3588-naneng-combphy"; + reg = <0x0 0xfee00000 0x0 0x100>; +-- +2.42.0 + + +From 31151de30e886f86489560d96175707d8d6c814d Mon Sep 17 00:00:00 2001 +From: Cristian Ciocaltea +Date: Wed, 14 Feb 2024 13:45:37 +0200 +Subject: [PATCH 29/71] [MERGED] phy: rockchip: Add Samsung HDMI/eDP Combo PHY + driver + +Add driver for the HDMI/eDP TX Combo PHY found on Rockchip RK3588 SoC. + +The PHY is based on a Samsung IP block and supports HDMI 2.1 TMDS, FRL +and eDP links. The maximum data rate is 12Gbps (FRL), while the minimum +is 250Mbps (TMDS). + +Only the TMDS link is currently supported. + +Co-developed-by: Algea Cao +Signed-off-by: Algea Cao +Tested-by: Heiko Stuebner +Signed-off-by: Cristian Ciocaltea +Link: https://lore.kernel.org/r/20240214-phy-hdptx-v4-2-e7974f46c1a7@collabora.com +Signed-off-by: Vinod Koul +--- + drivers/phy/rockchip/Kconfig | 8 + + drivers/phy/rockchip/Makefile | 1 + + .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 1028 +++++++++++++++++ + 3 files changed, 1037 insertions(+) + create mode 100644 drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c + +diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig +index 94360fc96a6f..a34f67bb7e61 100644 +--- a/drivers/phy/rockchip/Kconfig ++++ b/drivers/phy/rockchip/Kconfig +@@ -83,6 +83,14 @@ config PHY_ROCKCHIP_PCIE + help + Enable this to support the Rockchip PCIe PHY. + ++config PHY_ROCKCHIP_SAMSUNG_HDPTX ++ tristate "Rockchip Samsung HDMI/eDP Combo PHY driver" ++ depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF ++ select GENERIC_PHY ++ help ++ Enable this to support the Rockchip HDMI/eDP Combo PHY ++ with Samsung IP block. ++ + config PHY_ROCKCHIP_SNPS_PCIE3 + tristate "Rockchip Snps PCIe3 PHY Driver" + depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST +diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile +index 7eab129230d1..3d911304e654 100644 +--- a/drivers/phy/rockchip/Makefile ++++ b/drivers/phy/rockchip/Makefile +@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI) += phy-rockchip-inno-hdmi.o + obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2) += phy-rockchip-inno-usb2.o + obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY) += phy-rockchip-naneng-combphy.o + obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o ++obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX) += phy-rockchip-samsung-hdptx.o + obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3) += phy-rockchip-snps-pcie3.o + obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o + obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o +diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +new file mode 100644 +index 000000000000..946c01210ac8 +--- /dev/null ++++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +@@ -0,0 +1,1028 @@ ++// SPDX-License-Identifier: GPL-2.0+ ++/* ++ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd. ++ * Copyright (c) 2024 Collabora Ltd. ++ * ++ * Author: Algea Cao ++ * Author: Cristian Ciocaltea ++ */ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define GRF_HDPTX_CON0 0x00 ++#define HDPTX_I_PLL_EN BIT(7) ++#define HDPTX_I_BIAS_EN BIT(6) ++#define HDPTX_I_BGR_EN BIT(5) ++#define GRF_HDPTX_STATUS 0x80 ++#define HDPTX_O_PLL_LOCK_DONE BIT(3) ++#define HDPTX_O_PHY_CLK_RDY BIT(2) ++#define HDPTX_O_PHY_RDY BIT(1) ++#define HDPTX_O_SB_RDY BIT(0) ++ ++#define HDTPX_REG(_n, _min, _max) \ ++ ( \ ++ BUILD_BUG_ON_ZERO((0x##_n) < (0x##_min)) + \ ++ BUILD_BUG_ON_ZERO((0x##_n) > (0x##_max)) + \ ++ ((0x##_n) * 4) \ ++ ) ++ ++#define CMN_REG(n) HDTPX_REG(n, 0000, 00a7) ++#define SB_REG(n) HDTPX_REG(n, 0100, 0129) ++#define LNTOP_REG(n) HDTPX_REG(n, 0200, 0229) ++#define LANE_REG(n) HDTPX_REG(n, 0300, 062d) ++ ++/* CMN_REG(0008) */ ++#define LCPLL_EN_MASK BIT(6) ++#define LCPLL_LCVCO_MODE_EN_MASK BIT(4) ++/* CMN_REG(001e) */ ++#define LCPLL_PI_EN_MASK BIT(5) ++#define LCPLL_100M_CLK_EN_MASK BIT(0) ++/* CMN_REG(0025) */ ++#define LCPLL_PMS_IQDIV_RSTN BIT(4) ++/* CMN_REG(0028) */ ++#define LCPLL_SDC_FRAC_EN BIT(2) ++#define LCPLL_SDC_FRAC_RSTN BIT(0) ++/* CMN_REG(002d) */ ++#define LCPLL_SDC_N_MASK GENMASK(3, 1) ++/* CMN_REG(002e) */ ++#define LCPLL_SDC_NUMBERATOR_MASK GENMASK(5, 0) ++/* CMN_REG(002f) */ ++#define LCPLL_SDC_DENOMINATOR_MASK GENMASK(7, 2) ++#define LCPLL_SDC_NDIV_RSTN BIT(0) ++/* CMN_REG(003d) */ ++#define ROPLL_LCVCO_EN BIT(4) ++/* CMN_REG(004e) */ ++#define ROPLL_PI_EN BIT(5) ++/* CMN_REG(005c) */ ++#define ROPLL_PMS_IQDIV_RSTN BIT(5) ++/* CMN_REG(005e) */ ++#define ROPLL_SDM_EN_MASK BIT(6) ++#define ROPLL_SDM_FRAC_EN_RBR BIT(3) ++#define ROPLL_SDM_FRAC_EN_HBR BIT(2) ++#define ROPLL_SDM_FRAC_EN_HBR2 BIT(1) ++#define ROPLL_SDM_FRAC_EN_HBR3 BIT(0) ++/* CMN_REG(0064) */ ++#define ROPLL_SDM_NUM_SIGN_RBR_MASK BIT(3) ++/* CMN_REG(0069) */ ++#define ROPLL_SDC_N_RBR_MASK GENMASK(2, 0) ++/* CMN_REG(0074) */ ++#define ROPLL_SDC_NDIV_RSTN BIT(2) ++#define ROPLL_SSC_EN BIT(0) ++/* CMN_REG(0081) */ ++#define OVRD_PLL_CD_CLK_EN BIT(8) ++#define PLL_CD_HSCLK_EAST_EN BIT(0) ++/* CMN_REG(0086) */ ++#define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) ++#define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) ++#define PLL_PCG_CLK_EN BIT(0) ++/* CMN_REG(0087) */ ++#define PLL_FRL_MODE_EN BIT(3) ++#define PLL_TX_HS_CLK_EN BIT(2) ++/* CMN_REG(0089) */ ++#define LCPLL_ALONE_MODE BIT(1) ++/* CMN_REG(0097) */ ++#define DIG_CLK_SEL BIT(1) ++#define ROPLL_REF BIT(1) ++#define LCPLL_REF 0 ++/* CMN_REG(0099) */ ++#define CMN_ROPLL_ALONE_MODE BIT(2) ++#define ROPLL_ALONE_MODE BIT(2) ++/* CMN_REG(009a) */ ++#define HS_SPEED_SEL BIT(0) ++#define DIV_10_CLOCK BIT(0) ++/* CMN_REG(009b) */ ++#define IS_SPEED_SEL BIT(4) ++#define LINK_SYMBOL_CLOCK BIT(4) ++#define LINK_SYMBOL_CLOCK1_2 0 ++ ++/* SB_REG(0102) */ ++#define OVRD_SB_RXTERM_EN_MASK BIT(5) ++#define SB_RXTERM_EN_MASK BIT(4) ++#define ANA_SB_RXTERM_OFFSP_MASK GENMASK(3, 0) ++/* SB_REG(0103) */ ++#define ANA_SB_RXTERM_OFFSN_MASK GENMASK(6, 3) ++#define OVRD_SB_RX_RESCAL_DONE_MASK BIT(1) ++#define SB_RX_RESCAL_DONE_MASK BIT(0) ++/* SB_REG(0104) */ ++#define OVRD_SB_EN_MASK BIT(5) ++#define SB_EN_MASK BIT(4) ++/* SB_REG(0105) */ ++#define OVRD_SB_EARC_CMDC_EN_MASK BIT(6) ++#define SB_EARC_CMDC_EN_MASK BIT(5) ++#define ANA_SB_TX_HLVL_PROG_MASK GENMASK(2, 0) ++/* SB_REG(0106) */ ++#define ANA_SB_TX_LLVL_PROG_MASK GENMASK(6, 4) ++/* SB_REG(0109) */ ++#define ANA_SB_DMRX_AFC_DIV_RATIO_MASK GENMASK(2, 0) ++/* SB_REG(010f) */ ++#define OVRD_SB_VREG_EN_MASK BIT(7) ++#define SB_VREG_EN_MASK BIT(6) ++#define OVRD_SB_VREG_LPF_BYPASS_MASK BIT(5) ++#define SB_VREG_LPF_BYPASS_MASK BIT(4) ++#define ANA_SB_VREG_GAIN_CTRL_MASK GENMASK(3, 0) ++/* SB_REG(0110) */ ++#define ANA_SB_VREG_REF_SEL_MASK BIT(0) ++/* SB_REG(0113) */ ++#define SB_RX_RCAL_OPT_CODE_MASK GENMASK(5, 4) ++#define SB_RX_RTERM_CTRL_MASK GENMASK(3, 0) ++/* SB_REG(0114) */ ++#define SB_TG_SB_EN_DELAY_TIME_MASK GENMASK(5, 3) ++#define SB_TG_RXTERM_EN_DELAY_TIME_MASK GENMASK(2, 0) ++/* SB_REG(0115) */ ++#define SB_READY_DELAY_TIME_MASK GENMASK(5, 3) ++#define SB_TG_OSC_EN_DELAY_TIME_MASK GENMASK(2, 0) ++/* SB_REG(0116) */ ++#define AFC_RSTN_DELAY_TIME_MASK GENMASK(6, 4) ++/* SB_REG(0117) */ ++#define FAST_PULSE_TIME_MASK GENMASK(3, 0) ++/* SB_REG(011b) */ ++#define SB_EARC_SIG_DET_BYPASS_MASK BIT(4) ++#define SB_AFC_TOL_MASK GENMASK(3, 0) ++/* SB_REG(011f) */ ++#define SB_PWM_AFC_CTRL_MASK GENMASK(7, 2) ++#define SB_RCAL_RSTN_MASK BIT(1) ++/* SB_REG(0120) */ ++#define SB_EARC_EN_MASK BIT(1) ++#define SB_EARC_AFC_EN_MASK BIT(2) ++/* SB_REG(0123) */ ++#define OVRD_SB_READY_MASK BIT(5) ++#define SB_READY_MASK BIT(4) ++ ++/* LNTOP_REG(0200) */ ++#define PROTOCOL_SEL BIT(2) ++#define HDMI_MODE BIT(2) ++#define HDMI_TMDS_FRL_SEL BIT(1) ++/* LNTOP_REG(0206) */ ++#define DATA_BUS_SEL BIT(0) ++#define DATA_BUS_36_40 BIT(0) ++/* LNTOP_REG(0207) */ ++#define LANE_EN 0xf ++#define ALL_LANE_EN 0xf ++ ++/* LANE_REG(0312) */ ++#define LN0_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN0_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN0_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN0_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0412) */ ++#define LN1_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN1_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN1_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN1_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0512) */ ++#define LN2_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN2_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN2_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN2_TX_SER_RATE_SEL_HBR3 BIT(2) ++/* LANE_REG(0612) */ ++#define LN3_TX_SER_RATE_SEL_RBR BIT(5) ++#define LN3_TX_SER_RATE_SEL_HBR BIT(4) ++#define LN3_TX_SER_RATE_SEL_HBR2 BIT(3) ++#define LN3_TX_SER_RATE_SEL_HBR3 BIT(2) ++ ++struct lcpll_config { ++ u32 bit_rate; ++ u8 lcvco_mode_en; ++ u8 pi_en; ++ u8 clk_en_100m; ++ u8 pms_mdiv; ++ u8 pms_mdiv_afc; ++ u8 pms_pdiv; ++ u8 pms_refdiv; ++ u8 pms_sdiv; ++ u8 pi_cdiv_rstn; ++ u8 pi_cdiv_sel; ++ u8 sdm_en; ++ u8 sdm_rstn; ++ u8 sdc_frac_en; ++ u8 sdc_rstn; ++ u8 sdm_deno; ++ u8 sdm_num_sign; ++ u8 sdm_num; ++ u8 sdc_n; ++ u8 sdc_n2; ++ u8 sdc_num; ++ u8 sdc_deno; ++ u8 sdc_ndiv_rstn; ++ u8 ssc_en; ++ u8 ssc_fm_dev; ++ u8 ssc_fm_freq; ++ u8 ssc_clk_div_sel; ++ u8 cd_tx_ser_rate_sel; ++}; ++ ++struct ropll_config { ++ u32 bit_rate; ++ u8 pms_mdiv; ++ u8 pms_mdiv_afc; ++ u8 pms_pdiv; ++ u8 pms_refdiv; ++ u8 pms_sdiv; ++ u8 pms_iqdiv_rstn; ++ u8 ref_clk_sel; ++ u8 sdm_en; ++ u8 sdm_rstn; ++ u8 sdc_frac_en; ++ u8 sdc_rstn; ++ u8 sdm_clk_div; ++ u8 sdm_deno; ++ u8 sdm_num_sign; ++ u8 sdm_num; ++ u8 sdc_n; ++ u8 sdc_num; ++ u8 sdc_deno; ++ u8 sdc_ndiv_rstn; ++ u8 ssc_en; ++ u8 ssc_fm_dev; ++ u8 ssc_fm_freq; ++ u8 ssc_clk_div_sel; ++ u8 ana_cpp_ctrl; ++ u8 ana_lpf_c_sel; ++ u8 cd_tx_ser_rate_sel; ++}; ++ ++enum rk_hdptx_reset { ++ RST_PHY = 0, ++ RST_APB, ++ RST_INIT, ++ RST_CMN, ++ RST_LANE, ++ RST_ROPLL, ++ RST_LCPLL, ++ RST_MAX ++}; ++ ++struct rk_hdptx_phy { ++ struct device *dev; ++ struct regmap *regmap; ++ struct regmap *grf; ++ ++ struct phy *phy; ++ struct phy_config *phy_cfg; ++ struct clk_bulk_data *clks; ++ int nr_clks; ++ struct reset_control_bulk_data rsts[RST_MAX]; ++}; ++ ++static const struct ropll_config ropll_tmds_cfg[] = { ++ { 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5, ++ 0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, ++ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, ++ 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, ++ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++ { 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1, ++ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_cmn_init_seq[] = { ++ REG_SEQ0(CMN_REG(0009), 0x0c), ++ REG_SEQ0(CMN_REG(000a), 0x83), ++ REG_SEQ0(CMN_REG(000b), 0x06), ++ REG_SEQ0(CMN_REG(000c), 0x20), ++ REG_SEQ0(CMN_REG(000d), 0xb8), ++ REG_SEQ0(CMN_REG(000e), 0x0f), ++ REG_SEQ0(CMN_REG(000f), 0x0f), ++ REG_SEQ0(CMN_REG(0010), 0x04), ++ REG_SEQ0(CMN_REG(0011), 0x00), ++ REG_SEQ0(CMN_REG(0012), 0x26), ++ REG_SEQ0(CMN_REG(0013), 0x22), ++ REG_SEQ0(CMN_REG(0014), 0x24), ++ REG_SEQ0(CMN_REG(0015), 0x77), ++ REG_SEQ0(CMN_REG(0016), 0x08), ++ REG_SEQ0(CMN_REG(0017), 0x00), ++ REG_SEQ0(CMN_REG(0018), 0x04), ++ REG_SEQ0(CMN_REG(0019), 0x48), ++ REG_SEQ0(CMN_REG(001a), 0x01), ++ REG_SEQ0(CMN_REG(001b), 0x00), ++ REG_SEQ0(CMN_REG(001c), 0x01), ++ REG_SEQ0(CMN_REG(001d), 0x64), ++ REG_SEQ0(CMN_REG(001f), 0x00), ++ REG_SEQ0(CMN_REG(0026), 0x53), ++ REG_SEQ0(CMN_REG(0029), 0x01), ++ REG_SEQ0(CMN_REG(0030), 0x00), ++ REG_SEQ0(CMN_REG(0031), 0x20), ++ REG_SEQ0(CMN_REG(0032), 0x30), ++ REG_SEQ0(CMN_REG(0033), 0x0b), ++ REG_SEQ0(CMN_REG(0034), 0x23), ++ REG_SEQ0(CMN_REG(0035), 0x00), ++ REG_SEQ0(CMN_REG(0038), 0x00), ++ REG_SEQ0(CMN_REG(0039), 0x00), ++ REG_SEQ0(CMN_REG(003a), 0x00), ++ REG_SEQ0(CMN_REG(003b), 0x00), ++ REG_SEQ0(CMN_REG(003c), 0x80), ++ REG_SEQ0(CMN_REG(003e), 0x0c), ++ REG_SEQ0(CMN_REG(003f), 0x83), ++ REG_SEQ0(CMN_REG(0040), 0x06), ++ REG_SEQ0(CMN_REG(0041), 0x20), ++ REG_SEQ0(CMN_REG(0042), 0xb8), ++ REG_SEQ0(CMN_REG(0043), 0x00), ++ REG_SEQ0(CMN_REG(0044), 0x46), ++ REG_SEQ0(CMN_REG(0045), 0x24), ++ REG_SEQ0(CMN_REG(0046), 0xff), ++ REG_SEQ0(CMN_REG(0047), 0x00), ++ REG_SEQ0(CMN_REG(0048), 0x44), ++ REG_SEQ0(CMN_REG(0049), 0xfa), ++ REG_SEQ0(CMN_REG(004a), 0x08), ++ REG_SEQ0(CMN_REG(004b), 0x00), ++ REG_SEQ0(CMN_REG(004c), 0x01), ++ REG_SEQ0(CMN_REG(004d), 0x64), ++ REG_SEQ0(CMN_REG(004e), 0x14), ++ REG_SEQ0(CMN_REG(004f), 0x00), ++ REG_SEQ0(CMN_REG(0050), 0x00), ++ REG_SEQ0(CMN_REG(005d), 0x0c), ++ REG_SEQ0(CMN_REG(005f), 0x01), ++ REG_SEQ0(CMN_REG(006b), 0x04), ++ REG_SEQ0(CMN_REG(0073), 0x30), ++ REG_SEQ0(CMN_REG(0074), 0x00), ++ REG_SEQ0(CMN_REG(0075), 0x20), ++ REG_SEQ0(CMN_REG(0076), 0x30), ++ REG_SEQ0(CMN_REG(0077), 0x08), ++ REG_SEQ0(CMN_REG(0078), 0x0c), ++ REG_SEQ0(CMN_REG(0079), 0x00), ++ REG_SEQ0(CMN_REG(007b), 0x00), ++ REG_SEQ0(CMN_REG(007c), 0x00), ++ REG_SEQ0(CMN_REG(007d), 0x00), ++ REG_SEQ0(CMN_REG(007e), 0x00), ++ REG_SEQ0(CMN_REG(007f), 0x00), ++ REG_SEQ0(CMN_REG(0080), 0x00), ++ REG_SEQ0(CMN_REG(0081), 0x09), ++ REG_SEQ0(CMN_REG(0082), 0x04), ++ REG_SEQ0(CMN_REG(0083), 0x24), ++ REG_SEQ0(CMN_REG(0084), 0x20), ++ REG_SEQ0(CMN_REG(0085), 0x03), ++ REG_SEQ0(CMN_REG(0086), 0x01), ++ REG_SEQ0(CMN_REG(0087), 0x0c), ++ REG_SEQ0(CMN_REG(008a), 0x55), ++ REG_SEQ0(CMN_REG(008b), 0x25), ++ REG_SEQ0(CMN_REG(008c), 0x2c), ++ REG_SEQ0(CMN_REG(008d), 0x22), ++ REG_SEQ0(CMN_REG(008e), 0x14), ++ REG_SEQ0(CMN_REG(008f), 0x20), ++ REG_SEQ0(CMN_REG(0090), 0x00), ++ REG_SEQ0(CMN_REG(0091), 0x00), ++ REG_SEQ0(CMN_REG(0092), 0x00), ++ REG_SEQ0(CMN_REG(0093), 0x00), ++ REG_SEQ0(CMN_REG(009a), 0x11), ++ REG_SEQ0(CMN_REG(009b), 0x10), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_cmn_init_seq[] = { ++ REG_SEQ0(CMN_REG(0008), 0x00), ++ REG_SEQ0(CMN_REG(0011), 0x01), ++ REG_SEQ0(CMN_REG(0017), 0x20), ++ REG_SEQ0(CMN_REG(001e), 0x14), ++ REG_SEQ0(CMN_REG(0020), 0x00), ++ REG_SEQ0(CMN_REG(0021), 0x00), ++ REG_SEQ0(CMN_REG(0022), 0x11), ++ REG_SEQ0(CMN_REG(0023), 0x00), ++ REG_SEQ0(CMN_REG(0024), 0x00), ++ REG_SEQ0(CMN_REG(0025), 0x53), ++ REG_SEQ0(CMN_REG(0026), 0x00), ++ REG_SEQ0(CMN_REG(0027), 0x00), ++ REG_SEQ0(CMN_REG(0028), 0x01), ++ REG_SEQ0(CMN_REG(002a), 0x00), ++ REG_SEQ0(CMN_REG(002b), 0x00), ++ REG_SEQ0(CMN_REG(002c), 0x00), ++ REG_SEQ0(CMN_REG(002d), 0x00), ++ REG_SEQ0(CMN_REG(002e), 0x04), ++ REG_SEQ0(CMN_REG(002f), 0x00), ++ REG_SEQ0(CMN_REG(0030), 0x20), ++ REG_SEQ0(CMN_REG(0031), 0x30), ++ REG_SEQ0(CMN_REG(0032), 0x0b), ++ REG_SEQ0(CMN_REG(0033), 0x23), ++ REG_SEQ0(CMN_REG(0034), 0x00), ++ REG_SEQ0(CMN_REG(003d), 0x40), ++ REG_SEQ0(CMN_REG(0042), 0x78), ++ REG_SEQ0(CMN_REG(004e), 0x34), ++ REG_SEQ0(CMN_REG(005c), 0x25), ++ REG_SEQ0(CMN_REG(005e), 0x4f), ++ REG_SEQ0(CMN_REG(0074), 0x04), ++ REG_SEQ0(CMN_REG(0081), 0x01), ++ REG_SEQ0(CMN_REG(0087), 0x04), ++ REG_SEQ0(CMN_REG(0089), 0x00), ++ REG_SEQ0(CMN_REG(0095), 0x00), ++ REG_SEQ0(CMN_REG(0097), 0x02), ++ REG_SEQ0(CMN_REG(0099), 0x04), ++ REG_SEQ0(CMN_REG(009b), 0x00), ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_sb_init_seq[] = { ++ REG_SEQ0(SB_REG(0114), 0x00), ++ REG_SEQ0(SB_REG(0115), 0x00), ++ REG_SEQ0(SB_REG(0116), 0x00), ++ REG_SEQ0(SB_REG(0117), 0x00), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lntop_highbr_seq[] = { ++ REG_SEQ0(LNTOP_REG(0201), 0x00), ++ REG_SEQ0(LNTOP_REG(0202), 0x00), ++ REG_SEQ0(LNTOP_REG(0203), 0x0f), ++ REG_SEQ0(LNTOP_REG(0204), 0xff), ++ REG_SEQ0(LNTOP_REG(0205), 0xff), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lntop_lowbr_seq[] = { ++ REG_SEQ0(LNTOP_REG(0201), 0x07), ++ REG_SEQ0(LNTOP_REG(0202), 0xc1), ++ REG_SEQ0(LNTOP_REG(0203), 0xf0), ++ REG_SEQ0(LNTOP_REG(0204), 0x7c), ++ REG_SEQ0(LNTOP_REG(0205), 0x1f), ++}; ++ ++static const struct reg_sequence rk_hdtpx_common_lane_init_seq[] = { ++ REG_SEQ0(LANE_REG(0303), 0x0c), ++ REG_SEQ0(LANE_REG(0307), 0x20), ++ REG_SEQ0(LANE_REG(030a), 0x17), ++ REG_SEQ0(LANE_REG(030b), 0x77), ++ REG_SEQ0(LANE_REG(030c), 0x77), ++ REG_SEQ0(LANE_REG(030d), 0x77), ++ REG_SEQ0(LANE_REG(030e), 0x38), ++ REG_SEQ0(LANE_REG(0310), 0x03), ++ REG_SEQ0(LANE_REG(0311), 0x0f), ++ REG_SEQ0(LANE_REG(0316), 0x02), ++ REG_SEQ0(LANE_REG(031b), 0x01), ++ REG_SEQ0(LANE_REG(031f), 0x15), ++ REG_SEQ0(LANE_REG(0320), 0xa0), ++ REG_SEQ0(LANE_REG(0403), 0x0c), ++ REG_SEQ0(LANE_REG(0407), 0x20), ++ REG_SEQ0(LANE_REG(040a), 0x17), ++ REG_SEQ0(LANE_REG(040b), 0x77), ++ REG_SEQ0(LANE_REG(040c), 0x77), ++ REG_SEQ0(LANE_REG(040d), 0x77), ++ REG_SEQ0(LANE_REG(040e), 0x38), ++ REG_SEQ0(LANE_REG(0410), 0x03), ++ REG_SEQ0(LANE_REG(0411), 0x0f), ++ REG_SEQ0(LANE_REG(0416), 0x02), ++ REG_SEQ0(LANE_REG(041b), 0x01), ++ REG_SEQ0(LANE_REG(041f), 0x15), ++ REG_SEQ0(LANE_REG(0420), 0xa0), ++ REG_SEQ0(LANE_REG(0503), 0x0c), ++ REG_SEQ0(LANE_REG(0507), 0x20), ++ REG_SEQ0(LANE_REG(050a), 0x17), ++ REG_SEQ0(LANE_REG(050b), 0x77), ++ REG_SEQ0(LANE_REG(050c), 0x77), ++ REG_SEQ0(LANE_REG(050d), 0x77), ++ REG_SEQ0(LANE_REG(050e), 0x38), ++ REG_SEQ0(LANE_REG(0510), 0x03), ++ REG_SEQ0(LANE_REG(0511), 0x0f), ++ REG_SEQ0(LANE_REG(0516), 0x02), ++ REG_SEQ0(LANE_REG(051b), 0x01), ++ REG_SEQ0(LANE_REG(051f), 0x15), ++ REG_SEQ0(LANE_REG(0520), 0xa0), ++ REG_SEQ0(LANE_REG(0603), 0x0c), ++ REG_SEQ0(LANE_REG(0607), 0x20), ++ REG_SEQ0(LANE_REG(060a), 0x17), ++ REG_SEQ0(LANE_REG(060b), 0x77), ++ REG_SEQ0(LANE_REG(060c), 0x77), ++ REG_SEQ0(LANE_REG(060d), 0x77), ++ REG_SEQ0(LANE_REG(060e), 0x38), ++ REG_SEQ0(LANE_REG(0610), 0x03), ++ REG_SEQ0(LANE_REG(0611), 0x0f), ++ REG_SEQ0(LANE_REG(0616), 0x02), ++ REG_SEQ0(LANE_REG(061b), 0x01), ++ REG_SEQ0(LANE_REG(061f), 0x15), ++ REG_SEQ0(LANE_REG(0620), 0xa0), ++}; ++ ++static const struct reg_sequence rk_hdtpx_tmds_lane_init_seq[] = { ++ REG_SEQ0(LANE_REG(0312), 0x00), ++ REG_SEQ0(LANE_REG(031e), 0x00), ++ REG_SEQ0(LANE_REG(0412), 0x00), ++ REG_SEQ0(LANE_REG(041e), 0x00), ++ REG_SEQ0(LANE_REG(0512), 0x00), ++ REG_SEQ0(LANE_REG(051e), 0x00), ++ REG_SEQ0(LANE_REG(0612), 0x00), ++ REG_SEQ0(LANE_REG(061e), 0x08), ++ REG_SEQ0(LANE_REG(0303), 0x2f), ++ REG_SEQ0(LANE_REG(0403), 0x2f), ++ REG_SEQ0(LANE_REG(0503), 0x2f), ++ REG_SEQ0(LANE_REG(0603), 0x2f), ++ REG_SEQ0(LANE_REG(0305), 0x03), ++ REG_SEQ0(LANE_REG(0405), 0x03), ++ REG_SEQ0(LANE_REG(0505), 0x03), ++ REG_SEQ0(LANE_REG(0605), 0x03), ++ REG_SEQ0(LANE_REG(0306), 0x1c), ++ REG_SEQ0(LANE_REG(0406), 0x1c), ++ REG_SEQ0(LANE_REG(0506), 0x1c), ++ REG_SEQ0(LANE_REG(0606), 0x1c), ++}; ++ ++static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) ++{ ++ switch (reg) { ++ case 0x0000 ... 0x029c: ++ case 0x0400 ... 0x04a4: ++ case 0x0800 ... 0x08a4: ++ case 0x0c00 ... 0x0cb4: ++ case 0x1000 ... 0x10b4: ++ case 0x1400 ... 0x14b4: ++ case 0x1800 ... 0x18b4: ++ return true; ++ } ++ ++ return false; ++} ++ ++static const struct regmap_config rk_hdptx_phy_regmap_config = { ++ .reg_bits = 32, ++ .reg_stride = 4, ++ .val_bits = 32, ++ .writeable_reg = rk_hdptx_phy_is_rw_reg, ++ .readable_reg = rk_hdptx_phy_is_rw_reg, ++ .fast_io = true, ++ .max_register = 0x18b4, ++}; ++ ++#define rk_hdptx_multi_reg_write(hdptx, seq) \ ++ regmap_multi_reg_write((hdptx)->regmap, seq, ARRAY_SIZE(seq)) ++ ++static void rk_hdptx_pre_power_up(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ ++ reset_control_assert(hdptx->rsts[RST_APB].rstc); ++ usleep_range(20, 25); ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ ++ reset_control_assert(hdptx->rsts[RST_LANE].rstc); ++ reset_control_assert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_assert(hdptx->rsts[RST_INIT].rstc); ++ ++ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++} ++ ++static int rk_hdptx_post_enable_lane(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ int ret; ++ ++ reset_control_deassert(hdptx->rsts[RST_LANE].rstc); ++ ++ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | ++ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, ++ (val & HDPTX_O_PHY_RDY) && ++ (val & HDPTX_O_PLL_LOCK_DONE), ++ 100, 5000); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to get PHY lane lock: %d\n", ret); ++ return ret; ++ } ++ ++ dev_dbg(hdptx->dev, "PHY lane locked\n"); ++ ++ return 0; ++} ++ ++static int rk_hdptx_post_enable_pll(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ int ret; ++ ++ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | ++ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ usleep_range(10, 15); ++ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); ++ ++ usleep_range(10, 15); ++ val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++ ++ usleep_range(10, 15); ++ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); ++ ++ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, ++ val & HDPTX_O_PHY_CLK_RDY, 20, 400); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to get PHY clk ready: %d\n", ret); ++ return ret; ++ } ++ ++ dev_dbg(hdptx->dev, "PHY clk ready\n"); ++ ++ return 0; ++} ++ ++static void rk_hdptx_phy_disable(struct rk_hdptx_phy *hdptx) ++{ ++ u32 val; ++ ++ /* reset phy and apb, or phy locked flag may keep 1 */ ++ reset_control_assert(hdptx->rsts[RST_PHY].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_PHY].rstc); ++ ++ reset_control_assert(hdptx->rsts[RST_APB].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ ++ regmap_write(hdptx->regmap, LANE_REG(0300), 0x82); ++ regmap_write(hdptx->regmap, SB_REG(010f), 0xc1); ++ regmap_write(hdptx->regmap, SB_REG(0110), 0x1); ++ regmap_write(hdptx->regmap, LANE_REG(0301), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0401), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0501), 0x80); ++ regmap_write(hdptx->regmap, LANE_REG(0601), 0x80); ++ ++ reset_control_assert(hdptx->rsts[RST_LANE].rstc); ++ reset_control_assert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_assert(hdptx->rsts[RST_INIT].rstc); ++ ++ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; ++ regmap_write(hdptx->grf, GRF_HDPTX_CON0, val); ++} ++ ++static bool rk_hdptx_phy_clk_pll_calc(unsigned int data_rate, ++ struct ropll_config *cfg) ++{ ++ const unsigned int fout = data_rate / 2, fref = 24000; ++ unsigned long k = 0, lc, k_sub, lc_sub; ++ unsigned int fvco, sdc; ++ u32 mdiv, sdiv, n = 8; ++ ++ if (fout > 0xfffffff) ++ return false; ++ ++ for (sdiv = 16; sdiv >= 1; sdiv--) { ++ if (sdiv % 2 && sdiv != 1) ++ continue; ++ ++ fvco = fout * sdiv; ++ ++ if (fvco < 2000000 || fvco > 4000000) ++ continue; ++ ++ mdiv = DIV_ROUND_UP(fvco, fref); ++ if (mdiv < 20 || mdiv > 255) ++ continue; ++ ++ if (fref * mdiv - fvco) { ++ for (sdc = 264000; sdc <= 750000; sdc += fref) ++ if (sdc * n > fref * mdiv) ++ break; ++ ++ if (sdc > 750000) ++ continue; ++ ++ rational_best_approximation(fref * mdiv - fvco, ++ sdc / 16, ++ GENMASK(6, 0), ++ GENMASK(7, 0), ++ &k, &lc); ++ ++ rational_best_approximation(sdc * n - fref * mdiv, ++ sdc, ++ GENMASK(6, 0), ++ GENMASK(7, 0), ++ &k_sub, &lc_sub); ++ } ++ ++ break; ++ } ++ ++ if (sdiv < 1) ++ return false; ++ ++ if (cfg) { ++ cfg->pms_mdiv = mdiv; ++ cfg->pms_mdiv_afc = mdiv; ++ cfg->pms_pdiv = 1; ++ cfg->pms_refdiv = 1; ++ cfg->pms_sdiv = sdiv - 1; ++ ++ cfg->sdm_en = k > 0 ? 1 : 0; ++ if (cfg->sdm_en) { ++ cfg->sdm_deno = lc; ++ cfg->sdm_num_sign = 1; ++ cfg->sdm_num = k; ++ cfg->sdc_n = n - 3; ++ cfg->sdc_num = k_sub; ++ cfg->sdc_deno = lc_sub; ++ } ++ } ++ ++ return true; ++} ++ ++static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, ++ unsigned int rate) ++{ ++ const struct ropll_config *cfg = NULL; ++ struct ropll_config rc = {0}; ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++) ++ if (rate == ropll_tmds_cfg[i].bit_rate) { ++ cfg = &ropll_tmds_cfg[i]; ++ break; ++ } ++ ++ if (!cfg) { ++ if (rk_hdptx_phy_clk_pll_calc(rate, &rc)) { ++ cfg = &rc; ++ } else { ++ dev_err(hdptx->dev, "%s cannot find pll cfg\n", __func__); ++ return -EINVAL; ++ } ++ } ++ ++ dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u, sdm_en=%u, k_sign=%u, k=%u, lc=%u\n", ++ cfg->pms_mdiv, cfg->pms_sdiv + 1, cfg->sdm_en, ++ cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno); ++ ++ rk_hdptx_pre_power_up(hdptx); ++ ++ reset_control_assert(hdptx->rsts[RST_ROPLL].rstc); ++ usleep_range(20, 30); ++ reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc); ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_cmn_init_seq); ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_cmn_init_seq); ++ ++ regmap_write(hdptx->regmap, CMN_REG(0051), cfg->pms_mdiv); ++ regmap_write(hdptx->regmap, CMN_REG(0055), cfg->pms_mdiv_afc); ++ regmap_write(hdptx->regmap, CMN_REG(0059), ++ (cfg->pms_pdiv << 4) | cfg->pms_refdiv); ++ regmap_write(hdptx->regmap, CMN_REG(005a), cfg->pms_sdiv << 4); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(005e), ROPLL_SDM_EN_MASK, ++ FIELD_PREP(ROPLL_SDM_EN_MASK, cfg->sdm_en)); ++ if (!cfg->sdm_en) ++ regmap_update_bits(hdptx->regmap, CMN_REG(005e), 0xf, 0); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0064), ROPLL_SDM_NUM_SIGN_RBR_MASK, ++ FIELD_PREP(ROPLL_SDM_NUM_SIGN_RBR_MASK, cfg->sdm_num_sign)); ++ ++ regmap_write(hdptx->regmap, CMN_REG(0060), cfg->sdm_deno); ++ regmap_write(hdptx->regmap, CMN_REG(0065), cfg->sdm_num); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0069), ROPLL_SDC_N_RBR_MASK, ++ FIELD_PREP(ROPLL_SDC_N_RBR_MASK, cfg->sdc_n)); ++ ++ regmap_write(hdptx->regmap, CMN_REG(006c), cfg->sdc_num); ++ regmap_write(hdptx->regmap, CMN_REG(0070), cfg->sdc_deno); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_POSTDIV_SEL_MASK, ++ FIELD_PREP(PLL_PCG_POSTDIV_SEL_MASK, cfg->pms_sdiv)); ++ ++ regmap_update_bits(hdptx->regmap, CMN_REG(0086), PLL_PCG_CLK_EN, ++ PLL_PCG_CLK_EN); ++ ++ return rk_hdptx_post_enable_pll(hdptx); ++} ++ ++static int rk_hdptx_ropll_tmds_mode_config(struct rk_hdptx_phy *hdptx, ++ unsigned int rate) ++{ ++ u32 val; ++ int ret; ++ ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret) ++ return ret; ++ ++ if (!(val & HDPTX_O_PLL_LOCK_DONE)) { ++ ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, rate); ++ if (ret) ++ return ret; ++ } ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_sb_init_seq); ++ ++ regmap_write(hdptx->regmap, LNTOP_REG(0200), 0x06); ++ ++ if (rate >= 3400000) { ++ /* For 1/40 bitrate clk */ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_highbr_seq); ++ } else { ++ /* For 1/10 bitrate clk */ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lntop_lowbr_seq); ++ } ++ ++ regmap_write(hdptx->regmap, LNTOP_REG(0206), 0x07); ++ regmap_write(hdptx->regmap, LNTOP_REG(0207), 0x0f); ++ ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_common_lane_init_seq); ++ rk_hdptx_multi_reg_write(hdptx, rk_hdtpx_tmds_lane_init_seq); ++ ++ return rk_hdptx_post_enable_lane(hdptx); ++} ++ ++static int rk_hdptx_phy_power_on(struct phy *phy) ++{ ++ struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); ++ int ret, bus_width = phy_get_bus_width(hdptx->phy); ++ /* ++ * FIXME: Temporary workaround to pass pixel_clk_rate ++ * from the HDMI bridge driver until phy_configure_opts_hdmi ++ * becomes available in the PHY API. ++ */ ++ unsigned int rate = bus_width & 0xfffffff; ++ ++ dev_dbg(hdptx->dev, "%s bus_width=%x rate=%u\n", ++ __func__, bus_width, rate); ++ ++ ret = pm_runtime_resume_and_get(hdptx->dev); ++ if (ret) { ++ dev_err(hdptx->dev, "Failed to resume phy: %d\n", ret); ++ return ret; ++ } ++ ++ ret = rk_hdptx_ropll_tmds_mode_config(hdptx, rate); ++ if (ret) ++ pm_runtime_put(hdptx->dev); ++ ++ return ret; ++} ++ ++static int rk_hdptx_phy_power_off(struct phy *phy) ++{ ++ struct rk_hdptx_phy *hdptx = phy_get_drvdata(phy); ++ u32 val; ++ int ret; ++ ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret == 0 && (val & HDPTX_O_PLL_LOCK_DONE)) ++ rk_hdptx_phy_disable(hdptx); ++ ++ pm_runtime_put(hdptx->dev); ++ ++ return ret; ++} ++ ++static const struct phy_ops rk_hdptx_phy_ops = { ++ .power_on = rk_hdptx_phy_power_on, ++ .power_off = rk_hdptx_phy_power_off, ++ .owner = THIS_MODULE, ++}; ++ ++static int rk_hdptx_phy_runtime_suspend(struct device *dev) ++{ ++ struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev); ++ ++ clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks); ++ ++ return 0; ++} ++ ++static int rk_hdptx_phy_runtime_resume(struct device *dev) ++{ ++ struct rk_hdptx_phy *hdptx = dev_get_drvdata(dev); ++ int ret; ++ ++ ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); ++ if (ret) ++ dev_err(hdptx->dev, "Failed to enable clocks: %d\n", ret); ++ ++ return ret; ++} ++ ++static int rk_hdptx_phy_probe(struct platform_device *pdev) ++{ ++ struct phy_provider *phy_provider; ++ struct device *dev = &pdev->dev; ++ struct rk_hdptx_phy *hdptx; ++ void __iomem *regs; ++ int ret; ++ ++ hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL); ++ if (!hdptx) ++ return -ENOMEM; ++ ++ hdptx->dev = dev; ++ ++ regs = devm_platform_ioremap_resource(pdev, 0); ++ if (IS_ERR(regs)) ++ return dev_err_probe(dev, PTR_ERR(regs), ++ "Failed to ioremap resource\n"); ++ ++ ret = devm_clk_bulk_get_all(dev, &hdptx->clks); ++ if (ret < 0) ++ return dev_err_probe(dev, ret, "Failed to get clocks\n"); ++ if (ret == 0) ++ return dev_err_probe(dev, -EINVAL, "Missing clocks\n"); ++ ++ hdptx->nr_clks = ret; ++ ++ hdptx->regmap = devm_regmap_init_mmio(dev, regs, ++ &rk_hdptx_phy_regmap_config); ++ if (IS_ERR(hdptx->regmap)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->regmap), ++ "Failed to init regmap\n"); ++ ++ hdptx->rsts[RST_PHY].id = "phy"; ++ hdptx->rsts[RST_APB].id = "apb"; ++ hdptx->rsts[RST_INIT].id = "init"; ++ hdptx->rsts[RST_CMN].id = "cmn"; ++ hdptx->rsts[RST_LANE].id = "lane"; ++ hdptx->rsts[RST_ROPLL].id = "ropll"; ++ hdptx->rsts[RST_LCPLL].id = "lcpll"; ++ ++ ret = devm_reset_control_bulk_get_exclusive(dev, RST_MAX, hdptx->rsts); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to get resets\n"); ++ ++ hdptx->grf = syscon_regmap_lookup_by_phandle(dev->of_node, ++ "rockchip,grf"); ++ if (IS_ERR(hdptx->grf)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->grf), ++ "Could not get GRF syscon\n"); ++ ++ hdptx->phy = devm_phy_create(dev, NULL, &rk_hdptx_phy_ops); ++ if (IS_ERR(hdptx->phy)) ++ return dev_err_probe(dev, PTR_ERR(hdptx->phy), ++ "Failed to create HDMI PHY\n"); ++ ++ platform_set_drvdata(pdev, hdptx); ++ phy_set_drvdata(hdptx->phy, hdptx); ++ phy_set_bus_width(hdptx->phy, 8); ++ ++ ret = devm_pm_runtime_enable(dev); ++ if (ret) ++ return dev_err_probe(dev, ret, "Failed to enable runtime PM\n"); ++ ++ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); ++ if (IS_ERR(phy_provider)) ++ return dev_err_probe(dev, PTR_ERR(phy_provider), ++ "Failed to register PHY provider\n"); ++ ++ reset_control_deassert(hdptx->rsts[RST_APB].rstc); ++ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); ++ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); ++ ++ return 0; ++} ++ ++static const struct dev_pm_ops rk_hdptx_phy_pm_ops = { ++ RUNTIME_PM_OPS(rk_hdptx_phy_runtime_suspend, ++ rk_hdptx_phy_runtime_resume, NULL) ++}; ++ ++static const struct of_device_id rk_hdptx_phy_of_match[] = { ++ { .compatible = "rockchip,rk3588-hdptx-phy", }, ++ {} ++}; ++MODULE_DEVICE_TABLE(of, rk_hdptx_phy_of_match); ++ ++static struct platform_driver rk_hdptx_phy_driver = { ++ .probe = rk_hdptx_phy_probe, ++ .driver = { ++ .name = "rockchip-hdptx-phy", ++ .pm = &rk_hdptx_phy_pm_ops, ++ .of_match_table = rk_hdptx_phy_of_match, ++ }, ++}; ++module_platform_driver(rk_hdptx_phy_driver); ++ ++MODULE_AUTHOR("Algea Cao "); ++MODULE_AUTHOR("Cristian Ciocaltea "); ++MODULE_DESCRIPTION("Samsung HDMI/eDP Transmitter Combo PHY Driver"); ++MODULE_LICENSE("GPL"); +-- +2.42.0 + + +From ff584be13784e7fc69c5ff1e2cf3598548c8afc0 Mon Sep 17 00:00:00 2001 +From: Alexander Stein +Date: Wed, 17 Jan 2024 09:32:06 +0100 +Subject: [PATCH 30/71] [MERGED] of: property: Make 'no port node found' output + a debug message + +There are cases where an unavailable port is not an error, making this +error message a false-positive. Since commit d56de8c9a17d8 ("usb: typec: +tcpm: try to get role switch from tcpc fwnode") the role switch is tried +on the port dev first and tcpc fwnode afterwards. If using the latter +bindings getting from port dev fails every time. The kernel log is flooded +with the messages like: + OF: graph: no port node found in /soc@0/bus@42000000/i2c@42530000/usb-typec@50 +Silence this message by making it a debug message. + +Signed-off-by: Alexander Stein +Link: https://lore.kernel.org/r/20240117083206.2901534-1-alexander.stein@ew.tq-group.com +[picked up from upstream as alternative to my fix for the TypeC issue] +Signed-off-by: Sebastian Reichel +--- + drivers/of/property.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/of/property.c b/drivers/of/property.c +index fa8cd33be131..b0e7e506955f 100644 +--- a/drivers/of/property.c ++++ b/drivers/of/property.c +@@ -665,7 +665,7 @@ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, + of_node_put(node); + + if (!port) { +- pr_err("graph: no port node found in %pOF\n", parent); ++ pr_debug("graph: no port node found in %pOF\n", parent); + return NULL; + } + } else { +-- +2.42.0 + + +From dca4bbdc13832755e85ce9f22396a44f4d1ba7d2 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 24 Oct 2023 16:09:35 +0200 -Subject: [PATCH 03/81] math.h: add DIV_ROUND_UP_NO_OVERFLOW +Subject: [PATCH 31/71] math.h: add DIV_ROUND_UP_NO_OVERFLOW Add a new DIV_ROUND_UP helper, which cannot overflow when big numbers are being used. @@ -301,10 +17080,10 @@ index dd4152711de7..f80bfb375ab9 100644 2.42.0 -From a30b72e251f17af0c76f8f199cb064e9ab69c93c Mon Sep 17 00:00:00 2001 +From 3223961d24b3e1ac452e8eae5021e72a6a95d599 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 24 Oct 2023 16:13:50 +0200 -Subject: [PATCH 04/81] clk: divider: Fix divisor masking on 64 bit platforms +Subject: [PATCH 32/71] clk: divider: Fix divisor masking on 64 bit platforms The clock framework handles clock rates as "unsigned long", so u32 on 32-bit architectures and u64 on 64-bit architectures. @@ -366,10 +17145,10 @@ index a2c2b5203b0a..94b4fb66a60f 100644 2.42.0 -From ac3cfb90de661baecd20afe96999bbbf60117ad5 Mon Sep 17 00:00:00 2001 +From 98a2896e433c36afd3c3379d5ac32c4eb5a16733 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 24 Oct 2023 18:09:57 +0200 -Subject: [PATCH 05/81] clk: composite: replace open-coded abs_diff() +Subject: [PATCH 33/71] clk: composite: replace open-coded abs_diff() Replace the open coded abs_diff() with the existing helper function. @@ -407,96 +17186,10 @@ index 66759fe28fad..478a4e594336 100644 2.42.0 -From e59f2fb5ec906b3adf998ce62d34c7754f52a19d Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Fri, 9 Feb 2024 18:13:51 +0100 -Subject: [PATCH 06/81] dt-bindings: soc: rockchip: add clock to RK3588 VO grf - -The RK3588 VO GRF needs a clock. This adds the clock to the allowed -properties, makes it mandatory for the RK3588 VO grf and disallows it -for any other Rockchip grf. - -Acked-by: Conor Dooley -Signed-off-by: Sebastian Reichel ---- - .../devicetree/bindings/soc/rockchip/grf.yaml | 19 +++++++++++++++++++ - 1 file changed, 19 insertions(+) - -diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -index 9793ea6f0fe6..20bc1f46384c 100644 ---- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -@@ -66,6 +66,9 @@ properties: - reg: - maxItems: 1 - -+ clocks: -+ maxItems: 1 -+ - "#address-cells": - const: 1 - -@@ -248,6 +251,22 @@ allOf: - - unevaluatedProperties: false - -+ - if: -+ properties: -+ compatible: -+ contains: -+ enum: -+ - rockchip,rk3588-vo-grf -+ -+ then: -+ required: -+ - clocks -+ -+ else: -+ properties: -+ clocks: false -+ -+ - examples: - - | - #include --- -2.42.0 - - -From 6e59957f0aede23e597d5d198d263cd35692dfbc Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Fri, 9 Feb 2024 18:15:45 +0100 -Subject: [PATCH 07/81] dt-bindings: soc: rockchip: add rk3588 USB3 syscon - -RK3588 USB3 support requires the GRF for USB and USBDP PHY. - -Acked-by: Conor Dooley -Signed-off-by: Sebastian Reichel ---- - Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -index 20bc1f46384c..12e7a78f7f6b 100644 ---- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -@@ -28,6 +28,8 @@ properties: - - rockchip,rk3588-sys-grf - - rockchip,rk3588-pcie3-phy-grf - - rockchip,rk3588-pcie3-pipe-grf -+ - rockchip,rk3588-usb-grf -+ - rockchip,rk3588-usbdpphy-grf - - rockchip,rk3588-vo-grf - - rockchip,rk3588-vop-grf - - rockchip,rv1108-usbgrf --- -2.42.0 - - -From 7e1be134214d9719cb4a5334f7f64278d5e00dad Mon Sep 17 00:00:00 2001 +From 5d30d35190def2882fca4806b1b431a64bb35331 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Apr 2023 17:38:57 +0200 -Subject: [PATCH 08/81] dt-bindings: phy: add rockchip usbdp combo phy document +Subject: [PATCH 34/71] dt-bindings: phy: add rockchip usbdp combo phy document Add device tree binding document for Rockchip USBDP Combo PHY with Samsung IP block. @@ -668,10 +17361,10 @@ index 000000000000..1f1f8863b80d 2.42.0 -From 58db879e9dfba8ffb1985a986b0f624ae340c626 Mon Sep 17 00:00:00 2001 +From b6977eb19c6c49f7afd6277fa673996defff069e Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Apr 2023 15:55:54 +0200 -Subject: [PATCH 09/81] phy: rockchip: add usbdp combo phy driver +Subject: [PATCH 35/71] phy: rockchip: add usbdp combo phy driver This adds a new USBDP combo PHY with Samsung IP block driver. @@ -701,10 +17394,10 @@ Signed-off-by: Sebastian Reichel create mode 100644 drivers/phy/rockchip/phy-rockchip-usbdp.c diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig -index 94360fc96a6f..d21b458c1d18 100644 +index a34f67bb7e61..c3d62243b474 100644 --- a/drivers/phy/rockchip/Kconfig +++ b/drivers/phy/rockchip/Kconfig -@@ -107,3 +107,15 @@ config PHY_ROCKCHIP_USB +@@ -115,3 +115,15 @@ config PHY_ROCKCHIP_USB select GENERIC_PHY help Enable this to support the Rockchip USB 2.0 PHY. @@ -721,10 +17414,10 @@ index 94360fc96a6f..d21b458c1d18 100644 + To compile this driver as a module, choose M here: the module + will be called phy-rockchip-usbdp diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile -index 7eab129230d1..25d2e1355db7 100644 +index 3d911304e654..010a824e32ce 100644 --- a/drivers/phy/rockchip/Makefile +++ b/drivers/phy/rockchip/Makefile -@@ -11,3 +11,4 @@ obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o +@@ -12,3 +12,4 @@ obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX) += phy-rockchip-samsung-hdptx.o obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3) += phy-rockchip-snps-pcie3.o obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o @@ -2351,10 +19044,10 @@ index 000000000000..1f3b7955c9f3 2.42.0 -From d402f520868c251196d22f4bbdaaf70c8d7daf0c Mon Sep 17 00:00:00 2001 +From 17fe499cd973f6af403dbba6bef2c45a4a24ac7d Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 5 Jan 2024 18:38:43 +0100 -Subject: [PATCH 10/81] arm64: defconfig: enable Rockchip Samsung USBDP PHY +Subject: [PATCH 36/71] arm64: defconfig: enable Rockchip Samsung USBDP PHY The USBDP Phy is used by RK3588 to handle the Dual-Role USB3 controllers. The Phy also supports Displayport Alt-Mode, but @@ -2366,11 +19059,11 @@ Signed-off-by: Sebastian Reichel 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig -index e6cf3e5d63c3..07890b86777e 100644 +index 134dce860641..ab24a68ebada 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig -@@ -1492,6 +1492,7 @@ CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY=m - CONFIG_PHY_ROCKCHIP_PCIE=m +@@ -1493,6 +1493,7 @@ CONFIG_PHY_ROCKCHIP_PCIE=m + CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX=m CONFIG_PHY_ROCKCHIP_SNPS_PCIE3=y CONFIG_PHY_ROCKCHIP_TYPEC=y +CONFIG_PHY_ROCKCHIP_USBDP=m @@ -2381,10 +19074,10 @@ index e6cf3e5d63c3..07890b86777e 100644 2.42.0 -From 43a05a87564338c831e111e961e890b9909573f5 Mon Sep 17 00:00:00 2001 +From 3ca1ee771e45ae9a68b844d0cde9316fcfa233b7 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 13 Feb 2024 15:12:27 +0100 -Subject: [PATCH 11/81] arm64: dts: rockchip: reorder usb2phy properties for +Subject: [PATCH 37/71] arm64: dts: rockchip: reorder usb2phy properties for rk3588 Reorder common DT properties alphabetically for usb2phy, according @@ -2396,7 +19089,7 @@ Signed-off-by: Sebastian Reichel 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 36b1b7acfe6a..125689d7c5c2 100644 +index 3a15a30543c3..20eeb180caae 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -545,13 +545,13 @@ usb2phy2_grf: syscon@fd5d8000 { @@ -2439,10 +19132,10 @@ index 36b1b7acfe6a..125689d7c5c2 100644 2.42.0 -From 6dc807802748464d8fb9144de779436b9a6e5861 Mon Sep 17 00:00:00 2001 +From 078990d092d8ee0c76f71365cbe5fe1301860230 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Mon, 12 Feb 2024 19:08:27 +0100 -Subject: [PATCH 12/81] arm64: dts: rockchip: fix usb2phy nodename for rk3588 +Subject: [PATCH 38/71] arm64: dts: rockchip: fix usb2phy nodename for rk3588 usb2-phy should be named usb2phy according to the DT binding, so let's fix it up accordingly. @@ -2453,7 +19146,7 @@ Signed-off-by: Sebastian Reichel 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 125689d7c5c2..8708436dd545 100644 +index 20eeb180caae..14596948ce40 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -542,7 +542,7 @@ usb2phy2_grf: syscon@fd5d8000 { @@ -2478,10 +19171,10 @@ index 125689d7c5c2..8708436dd545 100644 2.42.0 -From 3c08eb875a1c64e0374cd63b92ad7404581ce90b Mon Sep 17 00:00:00 2001 +From 5343b8fa7352329f838671546de0a7f7030e055a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Apr 2023 17:49:04 +0200 -Subject: [PATCH 13/81] arm64: dts: rockchip: add USBDP phys on rk3588 +Subject: [PATCH 39/71] arm64: dts: rockchip: add USBDP phys on rk3588 Add both USB3-DisplayPort PHYs to RK3588 SoC DT. @@ -2562,7 +19255,7 @@ index 5519c1430cb7..4fdd047c9eb9 100644 compatible = "rockchip,rk3588-naneng-combphy"; reg = <0x0 0xfee10000 0x0 0x100>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 8708436dd545..069aef13fcd4 100644 +index 14596948ce40..b83fc2885087 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -516,11 +516,22 @@ vop_grf: syscon@fd5a4000 { @@ -2625,8 +19318,8 @@ index 8708436dd545..069aef13fcd4 100644 usb2phy2_grf: syscon@fd5d8000 { compatible = "rockchip,rk3588-usb2phy-grf", "syscon", "simple-mfd"; reg = <0x0 0xfd5d8000 0x0 0x4000>; -@@ -2360,6 +2401,28 @@ dmac2: dma-controller@fed10000 { - #dma-cells = <1>; +@@ -2381,6 +2422,28 @@ hdptxphy_hdmi0: phy@fed60000 { + status = "disabled"; }; + usbdp_phy0: phy@fed80000 { @@ -2658,10 +19351,10 @@ index 8708436dd545..069aef13fcd4 100644 2.42.0 -From d76891cecefde5c32131333cae230c726076f372 Mon Sep 17 00:00:00 2001 +From f54fc51c66c257f50bacbb850c279e03abddbbf2 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 18 Jul 2023 19:05:38 +0200 -Subject: [PATCH 14/81] arm64: dts: rockchip: add USB3 DRD controllers on +Subject: [PATCH 40/71] arm64: dts: rockchip: add USB3 DRD controllers on rk3588 Add both USB3 dual-role controllers to the RK3588 devicetree. @@ -2704,7 +19397,7 @@ index 4fdd047c9eb9..5984016b5f96 100644 compatible = "rockchip,rk3588-pcie3-phy-grf", "syscon"; reg = <0x0 0xfd5b8000 0x0 0x10000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 069aef13fcd4..7ac68ffc3f67 100644 +index b83fc2885087..bb0a3189421a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -436,6 +436,28 @@ scmi_shmem: sram@0 { @@ -2740,10 +19433,10 @@ index 069aef13fcd4..7ac68ffc3f67 100644 2.42.0 -From 10c2c10d1b4f39496f1e0245aa93072d76d2ce4e Mon Sep 17 00:00:00 2001 +From e0b810775f8783ae10419ca0bfbe71043c7fa8e5 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 26 Apr 2023 21:18:43 +0200 -Subject: [PATCH 15/81] arm64: dts: rockchip: add USB3 to rk3588-evb1 +Subject: [PATCH 41/71] arm64: dts: rockchip: add USB3 to rk3588-evb1 Add support for the board's USB3 connectors. It has 1x USB Type-A and 1x USB Type-C. @@ -2754,7 +19447,7 @@ Signed-off-by: Sebastian Reichel 1 file changed, 143 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index ac7c677b0fb9..0e82dbb4c5ff 100644 +index de30c2632b8e..c3746d3a9b1d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -9,6 +9,7 @@ @@ -2858,7 +19551,7 @@ index ac7c677b0fb9..0e82dbb4c5ff 100644 }; &pwm2 { -@@ -1040,6 +1113,22 @@ &sata0 { +@@ -1041,6 +1114,22 @@ &sata0 { status = "okay"; }; @@ -2881,7 +19574,7 @@ index ac7c677b0fb9..0e82dbb4c5ff 100644 &u2phy2 { status = "okay"; }; -@@ -1078,3 +1167,57 @@ &usb_host1_ehci { +@@ -1079,3 +1168,57 @@ &usb_host1_ehci { &usb_host1_ohci { status = "okay"; }; @@ -2943,10 +19636,10 @@ index ac7c677b0fb9..0e82dbb4c5ff 100644 2.42.0 -From 8868bd5cc4740658da184f06f5a889fb004aa753 Mon Sep 17 00:00:00 2001 +From 5044cdbb66f7fafc510acad652ee685798aa718e Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Jul 2023 16:30:46 +0200 -Subject: [PATCH 16/81] arm64: dts: rockchip: add upper USB3 port to rock-5a +Subject: [PATCH 42/71] arm64: dts: rockchip: add upper USB3 port to rock-5a Enable full support (XHCI, EHCI, OHCI) for the upper USB3 port from Radxa Rock 5 Model A. The lower one is already supported. @@ -3003,10 +19696,10 @@ index 2002fd0221fa..149058352f4e 100644 2.42.0 -From 08ff45a430f75167dc4848443b2392ad5ccf40d7 Mon Sep 17 00:00:00 2001 +From 0f1fe27c89d5b2feb2c249f72c715e8766e7add4 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Jul 2023 17:18:17 +0200 -Subject: [PATCH 17/81] arm64: dts: rockchip: add lower USB3 port to rock-5b +Subject: [PATCH 43/71] arm64: dts: rockchip: add lower USB3 port to rock-5b Enable full support (XHCI, EHCI, OHCI) for the lower USB3 port from Radxa Rock 5 Model B. The upper one is already supported. @@ -3062,58 +19755,10 @@ index a0e303c3a1dc..149bd44ffd1c 100644 2.42.0 -From 534eb1e880eff0d89b4762ec66c13a40ed328f62 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Fri, 28 Jul 2023 16:43:16 +0200 -Subject: [PATCH 18/81] usb: typec: tcpm: avoid graph warning - -When using a devicetree as described in commit d56de8c9a17d ("usb: -typec: tcpm: try to get role switch from tcpc fwnode"), the kernel -will print an error when probing the TCPM driver, which looks -similar to this: - -OF: graph: no port node found in /i2c@feac0000/usb-typec@22 - -This is a false positive, since the code first tries to find a ports -node for the device and only then checks the fwnode. Fix this by -swapping the order. - -Note, that this will now generate a error message for devicetrees with -a role-switch ports node directly in the TCPM node instead of in the -connectors sub-node, before falling back to the legacy behaviour. These -devicetrees generate warnings when being checked against the bindings, -and should be fixed. - -Fixes: d56de8c9a17d ("usb: typec: tcpm: try to get role switch from tcpc fwnode") -Signed-off-by: Sebastian Reichel ---- - drivers/usb/typec/tcpm/tcpm.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c -index 5945e3a2b0f7..99991cb666a8 100644 ---- a/drivers/usb/typec/tcpm/tcpm.c -+++ b/drivers/usb/typec/tcpm/tcpm.c -@@ -6831,9 +6831,9 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) - - port->partner_desc.identity = &port->partner_ident; - -- port->role_sw = usb_role_switch_get(port->dev); -+ port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); - if (!port->role_sw) -- port->role_sw = fwnode_usb_role_switch_get(tcpc->fwnode); -+ port->role_sw = usb_role_switch_get(port->dev); - if (IS_ERR(port->role_sw)) { - err = PTR_ERR(port->role_sw); - goto out_destroy_wq; --- -2.42.0 - - -From 8271dd6892fb183ee32629aed76da25cac331f18 Mon Sep 17 00:00:00 2001 +From 2e7502b6de1c3054380ac79268e9bcb2ec277004 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 25 Jul 2023 18:35:56 +0200 -Subject: [PATCH 19/81] [BROKEN] arm64: dts: rockchip: rk3588-rock5b: add USB-C +Subject: [PATCH 44/71] [BROKEN] arm64: dts: rockchip: rk3588-rock5b: add USB-C support Add support for using the Radxa Rock 5 Model B USB-C port for USB in @@ -3311,10 +19956,10 @@ index 149bd44ffd1c..41d2a0870d9f 100644 2.42.0 -From 2ef6d1736eadea23076b56fd8dd12d18ef68489b Mon Sep 17 00:00:00 2001 +From ddd6153c4076efe8f9d44759f9c042da7332219a Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 25 May 2023 19:45:02 +0200 -Subject: [PATCH 20/81] arm64: dts: rockchip: enable RK3588 tsadc by default +Subject: [PATCH 45/71] arm64: dts: rockchip: enable RK3588 tsadc by default Enable the thermal ADC for all boards. @@ -3324,10 +19969,10 @@ Signed-off-by: Sebastian Reichel 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 7ac68ffc3f67..088bbf842506 100644 +index bb0a3189421a..ce4fa00c4798 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -2288,7 +2288,6 @@ tsadc: tsadc@fec00000 { +@@ -2293,7 +2293,6 @@ tsadc: tsadc@fec00000 { pinctrl-1 = <&tsadc_shut>; pinctrl-names = "gpio", "otpout"; #thermal-sensor-cells = <1>; @@ -3339,10 +19984,10 @@ index 7ac68ffc3f67..088bbf842506 100644 2.42.0 -From f8a116a26ac78cf3f090cc98a8a6079b9a121230 Mon Sep 17 00:00:00 2001 +From 9c90c5032743a0419bf3fd2f914a24fd53101acd Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 18 Aug 2022 14:21:30 +0200 -Subject: [PATCH 21/81] cpufreq: rockchip: Introduce driver for rk3588 +Subject: [PATCH 46/71] cpufreq: rockchip: Introduce driver for rk3588 This is a heavily modified port from the downstream driver. Downstream used it for multiple rockchip generations, while @@ -3411,7 +20056,7 @@ index bd1e1357cef8..cfd35aa52043 100644 { .compatible = "st,stih418", }, diff --git a/drivers/cpufreq/rockchip-cpufreq.c b/drivers/cpufreq/rockchip-cpufreq.c new file mode 100644 -index 000000000000..0bf57ac85e60 +index 000000000000..9aaca8f3e782 --- /dev/null +++ b/drivers/cpufreq/rockchip-cpufreq.c @@ -0,0 +1,645 @@ @@ -3427,7 +20072,7 @@ index 000000000000..0bf57ac85e60 + * * handling of read margin registers + * + * Copyright (C) 2017 Fuzhou Rockchip Electronics Co., Ltd -+ * Copyright (C) 2023 Collabora Ltd. ++ * Copyright (C) 2023-2024 Collabora Ltd. + */ + +#include @@ -3730,7 +20375,7 @@ index 000000000000..0bf57ac85e60 + int opp_table_token = -EINVAL; + struct device_node *np; + struct device *dev; -+ const char * const reg_names[] = { "cpu", "mem", NULL }; ++ const char * const reg_names[] = { "cpu", NULL }; + int ret = 0; + + dev = get_cpu_device(cpu); @@ -4064,10 +20709,90 @@ index 000000000000..0bf57ac85e60 2.42.0 -From 548913ac229838ae2f9ec33e27185db67ba07f4d Mon Sep 17 00:00:00 2001 +From 4ff28cd6204a6ee6ba950860a7cd4309c24f17b4 Mon Sep 17 00:00:00 2001 +From: Sebastian Reichel +Date: Wed, 31 Jan 2024 18:15:50 +0100 +Subject: [PATCH 47/71] arm64: dts: rockchip: rk3588-evb1: Couple CPU + regulators + +The RK3588 CPUs have two supply inputs: one supply for the logic and one +for the memory interface. On many platforms both supplies are handled by +the same regulator. + +Boards, which have separate regulators for each supply need them coupled +together. This is necessary when cpufreq support is added to avoid crashes. + +Signed-off-by: Sebastian Reichel +--- + arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +index c3746d3a9b1d..f40b3d251f4b 100644 +--- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts ++++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +@@ -865,6 +865,8 @@ vdd_cpu_big1_s0: dcdc-reg1 { + regulator-max-microvolt = <1050000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_big1_s0"; ++ regulator-coupled-with = <&vdd_cpu_big1_mem_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +@@ -877,6 +879,8 @@ vdd_cpu_big0_s0: dcdc-reg2 { + regulator-max-microvolt = <1050000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_big0_s0"; ++ regulator-coupled-with = <&vdd_cpu_big0_mem_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +@@ -889,6 +893,8 @@ vdd_cpu_lit_s0: dcdc-reg3 { + regulator-max-microvolt = <950000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_lit_s0"; ++ regulator-coupled-with = <&vdd_cpu_lit_mem_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +@@ -913,6 +919,8 @@ vdd_cpu_big1_mem_s0: dcdc-reg5 { + regulator-max-microvolt = <1050000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_big1_mem_s0"; ++ regulator-coupled-with = <&vdd_cpu_big1_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +@@ -926,6 +934,8 @@ vdd_cpu_big0_mem_s0: dcdc-reg6 { + regulator-max-microvolt = <1050000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_big0_mem_s0"; ++ regulator-coupled-with = <&vdd_cpu_big0_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +@@ -950,6 +960,8 @@ vdd_cpu_lit_mem_s0: dcdc-reg8 { + regulator-max-microvolt = <950000>; + regulator-ramp-delay = <12500>; + regulator-name = "vdd_cpu_lit_mem_s0"; ++ regulator-coupled-with = <&vdd_cpu_lit_s0>; ++ regulator-coupled-max-spread = <10000>; + regulator-state-mem { + regulator-off-in-suspend; + }; +-- +2.42.0 + + +From 90d710441ecf9db76ca9976f8de54f8cdfc48ba4 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 4 Apr 2023 17:30:46 +0200 -Subject: [PATCH 22/81] arm64: dts: rockchip: rk3588: add cpu frequency scaling +Subject: [PATCH 48/71] arm64: dts: rockchip: rk3588: add cpu frequency scaling support Add required bits for CPU frequency scaling to the Rockchip 3588 @@ -4078,11 +20803,11 @@ frequencies based on (among other things) silicon quality. Signed-off-by: Sebastian Reichel --- - arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 452 ++++++++++++++++++++++ - 1 file changed, 452 insertions(+) + arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 394 ++++++++++++++++++++++ + 1 file changed, 394 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 088bbf842506..2f3db2e466db 100644 +index ce4fa00c4798..e167949f8b9a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -10,6 +10,7 @@ @@ -4093,7 +20818,7 @@ index 088bbf842506..2f3db2e466db 100644 / { compatible = "rockchip,rk3588"; -@@ -50,6 +51,215 @@ aliases { +@@ -50,6 +51,157 @@ aliases { spi4 = &spi4; }; @@ -4103,51 +20828,43 @@ index 088bbf842506..2f3db2e466db 100644 + + opp-408000000 { + opp-hz = /bits/ 64 <408000000>; -+ opp-microvolt = <750000 750000 950000>, -+ <750000 750000 950000>; ++ opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + opp-suspend; + }; + opp-600000000 { + opp-hz = /bits/ 64 <600000000>; -+ opp-microvolt = <750000 750000 950000>, -+ <750000 750000 950000>; ++ opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; + opp-816000000 { + opp-hz = /bits/ 64 <816000000>; -+ opp-microvolt = <750000 750000 950000>, -+ <750000 750000 950000>; ++ opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; + opp-1008000000 { + opp-hz = /bits/ 64 <1008000000>; -+ opp-microvolt = <750000 750000 950000>, -+ <750000 750000 950000>; ++ opp-microvolt = <750000 750000 950000>; + clock-latency-ns = <40000>; + }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; -+ opp-microvolt = <775000 775000 950000>, -+ <775000 775000 950000>; ++ opp-microvolt = <775000 775000 950000>; + clock-latency-ns = <40000>; + }; + opp-1416000000 { + opp-hz = /bits/ 64 <1416000000>; -+ opp-microvolt = <825000 825000 950000>, -+ <825000 825000 950000>; ++ opp-microvolt = <825000 825000 950000>; + clock-latency-ns = <40000>; + }; + opp-1608000000 { + opp-hz = /bits/ 64 <1608000000>; -+ opp-microvolt = <875000 875000 950000>, -+ <875000 875000 950000>; ++ opp-microvolt = <875000 875000 950000>; + clock-latency-ns = <40000>; + }; + opp-1800000000 { + opp-hz = /bits/ 64 <1800000000>; -+ opp-microvolt = <950000 950000 950000>, -+ <950000 950000 950000>; ++ opp-microvolt = <950000 950000 950000>; + clock-latency-ns = <40000>; + }; + }; @@ -4168,63 +20885,38 @@ index 088bbf842506..2f3db2e466db 100644 + + opp-408000000 { + opp-hz = /bits/ 64 <408000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + opp-suspend; + }; -+ opp-600000000 { -+ opp-hz = /bits/ 64 <600000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; -+ opp-816000000 { -+ opp-hz = /bits/ 64 <816000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; -+ opp-1008000000 { -+ opp-hz = /bits/ 64 <1008000000>; -+ opp-microvolt = <625000 625000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; -+ opp-microvolt = <650000 650000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1416000000 { + opp-hz = /bits/ 64 <1416000000>; -+ opp-microvolt = <675000 675000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1608000000 { + opp-hz = /bits/ 64 <1608000000>; -+ opp-microvolt = <700000 700000 1000000>, -+ <700000 700000 1000000>; ++ opp-microvolt = <700000 700000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1800000000 { + opp-hz = /bits/ 64 <1800000000>; -+ opp-microvolt = <775000 775000 1000000>, -+ <775000 775000 1000000>; ++ opp-microvolt = <775000 775000 1000000>; + clock-latency-ns = <40000>; + }; + opp-2016000000 { + opp-hz = /bits/ 64 <2016000000>; -+ opp-microvolt = <850000 850000 1000000>, -+ <850000 850000 1000000>; ++ opp-microvolt = <850000 850000 1000000>; + clock-latency-ns = <40000>; + }; + opp-2208000000 { + opp-hz = /bits/ 64 <2208000000>; -+ opp-microvolt = <925000 925000 1000000>, -+ <925000 925000 1000000>; ++ opp-microvolt = <925000 925000 1000000>; + clock-latency-ns = <40000>; + }; + }; @@ -4245,63 +20937,38 @@ index 088bbf842506..2f3db2e466db 100644 + + opp-408000000 { + opp-hz = /bits/ 64 <408000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + opp-suspend; + }; -+ opp-600000000 { -+ opp-hz = /bits/ 64 <600000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; -+ opp-816000000 { -+ opp-hz = /bits/ 64 <816000000>; -+ opp-microvolt = <600000 600000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; -+ opp-1008000000 { -+ opp-hz = /bits/ 64 <1008000000>; -+ opp-microvolt = <625000 625000 1000000>, -+ <675000 675000 1000000>; -+ clock-latency-ns = <40000>; -+ }; + opp-1200000000 { + opp-hz = /bits/ 64 <1200000000>; -+ opp-microvolt = <650000 650000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1416000000 { + opp-hz = /bits/ 64 <1416000000>; -+ opp-microvolt = <675000 675000 1000000>, -+ <675000 675000 1000000>; ++ opp-microvolt = <675000 675000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1608000000 { + opp-hz = /bits/ 64 <1608000000>; -+ opp-microvolt = <700000 700000 1000000>, -+ <700000 700000 1000000>; ++ opp-microvolt = <700000 700000 1000000>; + clock-latency-ns = <40000>; + }; + opp-1800000000 { + opp-hz = /bits/ 64 <1800000000>; -+ opp-microvolt = <775000 775000 1000000>, -+ <775000 775000 1000000>; ++ opp-microvolt = <775000 775000 1000000>; + clock-latency-ns = <40000>; + }; + opp-2016000000 { + opp-hz = /bits/ 64 <2016000000>; -+ opp-microvolt = <850000 850000 1000000>, -+ <850000 850000 1000000>; ++ opp-microvolt = <850000 850000 1000000>; + clock-latency-ns = <40000>; + }; + opp-2208000000 { + opp-hz = /bits/ 64 <2208000000>; -+ opp-microvolt = <925000 925000 1000000>, -+ <925000 925000 1000000>; ++ opp-microvolt = <925000 925000 1000000>; + clock-latency-ns = <40000>; + }; + }; @@ -4309,7 +20976,7 @@ index 088bbf842506..2f3db2e466db 100644 cpus { #address-cells = <1>; #size-cells = <0>; -@@ -96,6 +306,7 @@ cpu_l0: cpu@0 { +@@ -96,6 +248,7 @@ cpu_l0: cpu@0 { clocks = <&scmi_clk SCMI_CLK_CPUL>; assigned-clocks = <&scmi_clk SCMI_CLK_CPUL>; assigned-clock-rates = <816000000>; @@ -4317,7 +20984,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <32768>; i-cache-line-size = <64>; -@@ -115,6 +326,7 @@ cpu_l1: cpu@100 { +@@ -115,6 +268,7 @@ cpu_l1: cpu@100 { enable-method = "psci"; capacity-dmips-mhz = <530>; clocks = <&scmi_clk SCMI_CLK_CPUL>; @@ -4325,7 +20992,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <32768>; i-cache-line-size = <64>; -@@ -134,6 +346,7 @@ cpu_l2: cpu@200 { +@@ -134,6 +288,7 @@ cpu_l2: cpu@200 { enable-method = "psci"; capacity-dmips-mhz = <530>; clocks = <&scmi_clk SCMI_CLK_CPUL>; @@ -4333,7 +21000,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <32768>; i-cache-line-size = <64>; -@@ -153,6 +366,7 @@ cpu_l3: cpu@300 { +@@ -153,6 +308,7 @@ cpu_l3: cpu@300 { enable-method = "psci"; capacity-dmips-mhz = <530>; clocks = <&scmi_clk SCMI_CLK_CPUL>; @@ -4341,7 +21008,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <32768>; i-cache-line-size = <64>; -@@ -174,6 +388,7 @@ cpu_b0: cpu@400 { +@@ -174,6 +330,7 @@ cpu_b0: cpu@400 { clocks = <&scmi_clk SCMI_CLK_CPUB01>; assigned-clocks = <&scmi_clk SCMI_CLK_CPUB01>; assigned-clock-rates = <816000000>; @@ -4349,7 +21016,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <65536>; i-cache-line-size = <64>; -@@ -193,6 +408,7 @@ cpu_b1: cpu@500 { +@@ -193,6 +350,7 @@ cpu_b1: cpu@500 { enable-method = "psci"; capacity-dmips-mhz = <1024>; clocks = <&scmi_clk SCMI_CLK_CPUB01>; @@ -4357,7 +21024,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <65536>; i-cache-line-size = <64>; -@@ -214,6 +430,7 @@ cpu_b2: cpu@600 { +@@ -214,6 +372,7 @@ cpu_b2: cpu@600 { clocks = <&scmi_clk SCMI_CLK_CPUB23>; assigned-clocks = <&scmi_clk SCMI_CLK_CPUB23>; assigned-clock-rates = <816000000>; @@ -4365,7 +21032,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <65536>; i-cache-line-size = <64>; -@@ -233,6 +450,7 @@ cpu_b3: cpu@700 { +@@ -233,6 +392,7 @@ cpu_b3: cpu@700 { enable-method = "psci"; capacity-dmips-mhz = <1024>; clocks = <&scmi_clk SCMI_CLK_CPUB23>; @@ -4373,7 +21040,7 @@ index 088bbf842506..2f3db2e466db 100644 cpu-idle-states = <&CPU_SLEEP>; i-cache-size = <65536>; i-cache-line-size = <64>; -@@ -399,6 +617,230 @@ display_subsystem: display-subsystem { +@@ -399,6 +559,230 @@ display_subsystem: display-subsystem { ports = <&vop_out>; }; @@ -4604,7 +21271,7 @@ index 088bbf842506..2f3db2e466db 100644 timer { compatible = "arm,armv8-timer"; interrupts = , -@@ -554,6 +996,16 @@ usb_grf: syscon@fd5ac000 { +@@ -554,6 +938,16 @@ usb_grf: syscon@fd5ac000 { reg = <0x0 0xfd5ac000 0x0 0x4000>; }; @@ -4625,205 +21292,10 @@ index 088bbf842506..2f3db2e466db 100644 2.42.0 -From 0a6f128324f9057050a8e21b8bd587862257bdd0 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Thu, 25 May 2023 19:48:49 +0200 -Subject: [PATCH 23/81] arm64: dts: rockchip: rk3588-evb1: add cpu mem - regulator info - -Add the second supply regulator for the CPU cores, which is used -for supplying the memory interface. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 0e82dbb4c5ff..ecbbd553a31e 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -249,34 +249,42 @@ &combphy2_psu { - - &cpu_b0 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_mem_s0>; - }; - - &cpu_b1 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_mem_s0>; - }; - - &cpu_b2 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_mem_s0>; - }; - - &cpu_b3 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_mem_s0>; - }; - - &cpu_l0 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l1 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l2 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l3 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &gmac0 { --- -2.42.0 - - -From 49d47a9f0000a848e5be23a451b6620ac15ca489 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Mon, 24 Jul 2023 15:18:39 +0200 -Subject: [PATCH 24/81] arm64: dts: rockchip: rock5a: add cpu mem regulator - info - -Add the second supply regulator for the CPU cores, which is used -for supplying the memory interface. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts -index 149058352f4e..a9ad9d2d54ae 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588s-rock-5a.dts -@@ -120,34 +120,42 @@ &combphy2_psu { - - &cpu_b0 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b1 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b2 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_b3 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_l0 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l1 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l2 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l3 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &i2c0 { --- -2.42.0 - - -From 8195e4af42a7c6dd08ba0a398dbe8a05494aca6b Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Mon, 24 Jul 2023 15:07:49 +0200 -Subject: [PATCH 25/81] arm64: dts: rockchip: rock5b: add cpu mem regulator - info - -Add the second supply regulator for the CPU cores, which is used -for supplying the memory interface. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index 41d2a0870d9f..6c1f1e867cd0 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -@@ -154,34 +154,42 @@ &combphy2_psu { - - &cpu_b0 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b1 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b2 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_b3 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_l0 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l1 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l2 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &cpu_l3 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_mem_s0>; - }; - - &i2c0 { --- -2.42.0 - - -From 5e8f0518605dcaec3bb80bd7ed59e804f9c67603 Mon Sep 17 00:00:00 2001 +From f0df0da2179af4be21cc5d933122b5f5dfd7558f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 14 Jul 2023 17:38:24 +0200 -Subject: [PATCH 26/81] [BROKEN] arm64: dts: rockchip: rk3588-evb1: add PCIe2 +Subject: [PATCH 49/71] [BROKEN] arm64: dts: rockchip: rk3588-evb1: add PCIe2 WLAN controller Enable PCIe bus used by on-board PCIe Broadcom WLAN controller. @@ -4853,7 +21325,7 @@ Signed-off-by: Sebastian Reichel 1 file changed, 61 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index ecbbd553a31e..2183acb2ff8a 100644 +index f40b3d251f4b..7e22b0e0c754 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -120,6 +120,15 @@ backlight: backlight { @@ -4909,7 +21381,7 @@ index ecbbd553a31e..2183acb2ff8a 100644 &combphy2_psu { status = "okay"; }; -@@ -407,6 +440,12 @@ rgmii_phy: ethernet-phy@1 { +@@ -399,6 +432,12 @@ rgmii_phy: ethernet-phy@1 { }; }; @@ -4922,7 +21394,7 @@ index ecbbd553a31e..2183acb2ff8a 100644 &pcie2x1l1 { reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; -@@ -461,6 +500,18 @@ hym8563_int: hym8563-int { +@@ -453,6 +492,18 @@ hym8563_int: hym8563-int { }; pcie2 { @@ -4941,7 +21413,7 @@ index ecbbd553a31e..2183acb2ff8a 100644 pcie2_1_rst: pcie2-1-rst { rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>; }; -@@ -491,6 +542,16 @@ typec5v_pwren: typec5v-pwren { +@@ -483,6 +534,16 @@ typec5v_pwren: typec5v-pwren { rockchip,pins = <4 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>; }; }; @@ -4962,333 +21434,10 @@ index ecbbd553a31e..2183acb2ff8a 100644 2.42.0 -From b0abcf853c6d97c82e374b4ed9c75d5a47447214 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 13 Dec 2023 18:58:43 +0100 -Subject: [PATCH 27/81] clk: rockchip: rk3588: fix CLK_NR_CLKS usage - -CLK_NR_CLKS is not part of the DT bindings and needs to be removed -from it, just like it recently happened for other platforms. This -takes care of it by introducing a new function identifying the -maximum used clock ID at runtime. - -Signed-off-by: Sebastian Reichel ---- - drivers/clk/rockchip/clk-rk3588.c | 5 ++++- - drivers/clk/rockchip/clk.c | 17 +++++++++++++++++ - drivers/clk/rockchip/clk.h | 2 ++ - 3 files changed, 23 insertions(+), 1 deletion(-) - -diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c -index 6994165e0395..0b60ae78f9d8 100644 ---- a/drivers/clk/rockchip/clk-rk3588.c -+++ b/drivers/clk/rockchip/clk-rk3588.c -@@ -2458,15 +2458,18 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - static void __init rk3588_clk_init(struct device_node *np) - { - struct rockchip_clk_provider *ctx; -+ unsigned long clk_nr_clks; - void __iomem *reg_base; - -+ clk_nr_clks = rockchip_clk_find_max_clk_id(rk3588_clk_branches, -+ ARRAY_SIZE(rk3588_clk_branches)) + 1; - reg_base = of_iomap(np, 0); - if (!reg_base) { - pr_err("%s: could not map cru region\n", __func__); - return; - } - -- ctx = rockchip_clk_init(np, reg_base, CLK_NR_CLKS); -+ ctx = rockchip_clk_init(np, reg_base, clk_nr_clks); - if (IS_ERR(ctx)) { - pr_err("%s: rockchip clk init failed\n", __func__); - iounmap(reg_base); -diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c -index 4059d9365ae6..73d2cbdc716b 100644 ---- a/drivers/clk/rockchip/clk.c -+++ b/drivers/clk/rockchip/clk.c -@@ -429,6 +429,23 @@ void rockchip_clk_register_plls(struct rockchip_clk_provider *ctx, - } - EXPORT_SYMBOL_GPL(rockchip_clk_register_plls); - -+unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list, -+ unsigned int nr_clk) -+{ -+ unsigned long max = 0; -+ unsigned int idx; -+ -+ for (idx = 0; idx < nr_clk; idx++, list++) { -+ if (list->id > max) -+ max = list->id; -+ if (list->child && list->child->id > max) -+ max = list->id; -+ } -+ -+ return max; -+} -+EXPORT_SYMBOL_GPL(rockchip_clk_find_max_clk_id); -+ - void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, - struct rockchip_clk_branch *list, - unsigned int nr_clk) -diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h -index 758ebaf2236b..fd3b476dedda 100644 ---- a/drivers/clk/rockchip/clk.h -+++ b/drivers/clk/rockchip/clk.h -@@ -973,6 +973,8 @@ struct rockchip_clk_provider *rockchip_clk_init(struct device_node *np, - void __iomem *base, unsigned long nr_clks); - void rockchip_clk_of_add_provider(struct device_node *np, - struct rockchip_clk_provider *ctx); -+unsigned long rockchip_clk_find_max_clk_id(struct rockchip_clk_branch *list, -+ unsigned int nr_clk); - void rockchip_clk_register_branches(struct rockchip_clk_provider *ctx, - struct rockchip_clk_branch *list, - unsigned int nr_clk); --- -2.42.0 - - -From 9dafc0bbf4718c43cba9d77fc3de604d8d54c4d8 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 13 Dec 2023 18:57:31 +0100 -Subject: [PATCH 28/81] dt-bindings: clock: rk3588: drop CLK_NR_CLKS - -CLK_NR_CLKS should not be part of the binding. Let's drop it, since -the kernel code no longer uses it either. - -Reviewed-by: Krzysztof Kozlowski -Signed-off-by: Sebastian Reichel ---- - include/dt-bindings/clock/rockchip,rk3588-cru.h | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/include/dt-bindings/clock/rockchip,rk3588-cru.h b/include/dt-bindings/clock/rockchip,rk3588-cru.h -index 5790b1391201..7c6f0ec7c979 100644 ---- a/include/dt-bindings/clock/rockchip,rk3588-cru.h -+++ b/include/dt-bindings/clock/rockchip,rk3588-cru.h -@@ -734,8 +734,6 @@ - #define PCLK_AV1_PRE 719 - #define HCLK_SDIO_PRE 720 - --#define CLK_NR_CLKS (HCLK_SDIO_PRE + 1) -- - /* scmi-clocks indices */ - - #define SCMI_CLK_CPUL 0 --- -2.42.0 - - -From 2c538921df70e390db8439a71935ccd7c8e14bd0 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 13 Dec 2023 19:02:57 +0100 -Subject: [PATCH 29/81] dt-bindings: clock: rk3588: add missing PCLK_VO1GRF - -Add PCLK_VO1GRF to complement PCLK_VO0GRF. This will be needed -for HDMI support. - -Acked-by: Krzysztof Kozlowski -Signed-off-by: Sebastian Reichel ---- - include/dt-bindings/clock/rockchip,rk3588-cru.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/include/dt-bindings/clock/rockchip,rk3588-cru.h b/include/dt-bindings/clock/rockchip,rk3588-cru.h -index 7c6f0ec7c979..0c7d3ca2d5bc 100644 ---- a/include/dt-bindings/clock/rockchip,rk3588-cru.h -+++ b/include/dt-bindings/clock/rockchip,rk3588-cru.h -@@ -733,6 +733,7 @@ - #define ACLK_AV1_PRE 718 - #define PCLK_AV1_PRE 719 - #define HCLK_SDIO_PRE 720 -+#define PCLK_VO1GRF 721 - - /* scmi-clocks indices */ - --- -2.42.0 - - -From fd8584d10d04d0c719fc38af76aaafe924ba5560 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 13 Dec 2023 19:06:30 +0100 -Subject: [PATCH 30/81] clk: rockchip: rk3588: fix pclk_vo0grf and pclk_vo1grf - -Currently pclk_vo1grf is not exposed, but it should be referenced -from the vo1_grf syscon, which needs it enabled. That syscon is -required for HDMI RX and TX functionality among other things. - -Apart from that pclk_vo0grf and pclk_vo1grf are both linked gates -and need the VO's hclk enabled in addition to their parent clock. - -No Fixes tag has been added, since the logic requiring these clocks -is not yet upstream anyways. - -Signed-off-by: Sebastian Reichel ---- - drivers/clk/rockchip/clk-rk3588.c | 10 ++++------ - 1 file changed, 4 insertions(+), 6 deletions(-) - -diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c -index 0b60ae78f9d8..26330d655159 100644 ---- a/drivers/clk/rockchip/clk-rk3588.c -+++ b/drivers/clk/rockchip/clk-rk3588.c -@@ -1851,8 +1851,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - RK3588_CLKGATE_CON(56), 0, GFLAGS), - GATE(PCLK_TRNG0, "pclk_trng0", "pclk_vo0_root", 0, - RK3588_CLKGATE_CON(56), 1, GFLAGS), -- GATE(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", CLK_IGNORE_UNUSED, -- RK3588_CLKGATE_CON(55), 10, GFLAGS), - COMPOSITE(CLK_I2S4_8CH_TX_SRC, "clk_i2s4_8ch_tx_src", gpll_aupll_p, 0, - RK3588_CLKSEL_CON(118), 5, 1, MFLAGS, 0, 5, DFLAGS, - RK3588_CLKGATE_CON(56), 11, GFLAGS), -@@ -1998,8 +1996,6 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - RK3588_CLKGATE_CON(60), 9, GFLAGS), - GATE(PCLK_TRNG1, "pclk_trng1", "pclk_vo1_root", 0, - RK3588_CLKGATE_CON(60), 10, GFLAGS), -- GATE(0, "pclk_vo1grf", "pclk_vo1_root", CLK_IGNORE_UNUSED, -- RK3588_CLKGATE_CON(59), 12, GFLAGS), - GATE(PCLK_S_EDP0, "pclk_s_edp0", "pclk_vo1_s_root", 0, - RK3588_CLKGATE_CON(59), 14, GFLAGS), - GATE(PCLK_S_EDP1, "pclk_s_edp1", "pclk_vo1_s_root", 0, -@@ -2447,12 +2443,14 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), - GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), - GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", "aclk_vop_low_root", 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), -- GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", 0, RK3588_CLKGATE_CON(55), 5, GFLAGS), -+ GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), - GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), -- GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 9, GFLAGS), -+ GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), - GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), - GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), - GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", "hclk_nvm", 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), -+ GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", "hclk_vo0", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), -+ GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", "hclk_vo1", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), - }; - - static void __init rk3588_clk_init(struct device_node *np) --- -2.42.0 - - -From 0334dac5cbdde53c75ae94c2fee451a32a5947b1 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Tue, 21 Nov 2023 17:52:54 +0100 -Subject: [PATCH 31/81] clk: rockchip: rk3588: fix indent - -pclk_mailbox2 is the only RK3588 clock indented with one tab instead of -two tabs. Let's fix this. - -Signed-off-by: Sebastian Reichel ---- - drivers/clk/rockchip/clk-rk3588.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c -index 26330d655159..2e8bdd93c625 100644 ---- a/drivers/clk/rockchip/clk-rk3588.c -+++ b/drivers/clk/rockchip/clk-rk3588.c -@@ -1004,7 +1004,7 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - GATE(PCLK_MAILBOX1, "pclk_mailbox1", "pclk_top_root", 0, - RK3588_CLKGATE_CON(16), 12, GFLAGS), - GATE(PCLK_MAILBOX2, "pclk_mailbox2", "pclk_top_root", 0, -- RK3588_CLKGATE_CON(16), 13, GFLAGS), -+ RK3588_CLKGATE_CON(16), 13, GFLAGS), - GATE(PCLK_PMU2, "pclk_pmu2", "pclk_top_root", CLK_IS_CRITICAL, - RK3588_CLKGATE_CON(19), 3, GFLAGS), - GATE(PCLK_PMUCM0_INTMUX, "pclk_pmucm0_intmux", "pclk_top_root", CLK_IS_CRITICAL, --- -2.42.0 - - -From b97929e34878e635bf4e5b901d8d1c3055d7b8a1 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 22 Nov 2023 19:23:13 +0100 -Subject: [PATCH 32/81] clk: rockchip: rk3588: use linked clock ID for - GATE_LINK - -In preparation for properly supporting GATE_LINK switch the unused -linked clock argument from the clock's name to its ID. This allows -easy and fast lookup of the 'struct clk'. - -Signed-off-by: Sebastian Reichel ---- - drivers/clk/rockchip/clk-rk3588.c | 46 +++++++++++++++---------------- - 1 file changed, 23 insertions(+), 23 deletions(-) - -diff --git a/drivers/clk/rockchip/clk-rk3588.c b/drivers/clk/rockchip/clk-rk3588.c -index 2e8bdd93c625..b30279a96dc8 100644 ---- a/drivers/clk/rockchip/clk-rk3588.c -+++ b/drivers/clk/rockchip/clk-rk3588.c -@@ -29,7 +29,7 @@ - * power, but avoids leaking implementation details into DT or hanging the - * system. - */ --#define GATE_LINK(_id, cname, pname, linkname, f, o, b, gf) \ -+#define GATE_LINK(_id, cname, pname, linkedclk, f, o, b, gf) \ - GATE(_id, cname, pname, f, o, b, gf) - #define RK3588_LINKED_CLK CLK_IS_CRITICAL - -@@ -2429,28 +2429,28 @@ static struct rockchip_clk_branch rk3588_clk_branches[] __initdata = { - GATE(ACLK_AV1, "aclk_av1", "aclk_av1_pre", 0, - RK3588_CLKGATE_CON(68), 2, GFLAGS), - -- GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", "aclk_vi_root", 0, RK3588_CLKGATE_CON(26), 6, GFLAGS), -- GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", "hclk_vi_root", 0, RK3588_CLKGATE_CON(26), 8, GFLAGS), -- GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", "aclk_nvm_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS), -- GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 2, GFLAGS), -- GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", "hclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(42), 3, GFLAGS), -- GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 7, GFLAGS), -- GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(44), 5, GFLAGS), -- GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", "aclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 3, GFLAGS), -- GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", "hclk_rkvenc0", 0, RK3588_CLKGATE_CON(48), 2, GFLAGS), -- GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 5, GFLAGS), -- GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(40), 6, GFLAGS), -- GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), -- GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), -- GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", "aclk_vop_low_root", 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), -- GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", "hclk_vop_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), -- GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", "aclk_vo1usb_top_root", 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), -- GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", "hclk_vo1usb_top_root", RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), -- GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", "aclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), -- GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", "hclk_vdpu_root", 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), -- GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", "hclk_nvm", 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), -- GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", "hclk_vo0", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), -- GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", "hclk_vo1", CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), -+ GATE_LINK(ACLK_ISP1_PRE, "aclk_isp1_pre", "aclk_isp1_root", ACLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 6, GFLAGS), -+ GATE_LINK(HCLK_ISP1_PRE, "hclk_isp1_pre", "hclk_isp1_root", HCLK_VI_ROOT, 0, RK3588_CLKGATE_CON(26), 8, GFLAGS), -+ GATE_LINK(HCLK_NVM, "hclk_nvm", "hclk_nvm_root", ACLK_NVM_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(31), 2, GFLAGS), -+ GATE_LINK(ACLK_USB, "aclk_usb", "aclk_usb_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 2, GFLAGS), -+ GATE_LINK(HCLK_USB, "hclk_usb", "hclk_usb_root", HCLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(42), 3, GFLAGS), -+ GATE_LINK(ACLK_JPEG_DECODER_PRE, "aclk_jpeg_decoder_pre", "aclk_jpeg_decoder_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 7, GFLAGS), -+ GATE_LINK(ACLK_VDPU_LOW_PRE, "aclk_vdpu_low_pre", "aclk_vdpu_low_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(44), 5, GFLAGS), -+ GATE_LINK(ACLK_RKVENC1_PRE, "aclk_rkvenc1_pre", "aclk_rkvenc1_root", ACLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 3, GFLAGS), -+ GATE_LINK(HCLK_RKVENC1_PRE, "hclk_rkvenc1_pre", "hclk_rkvenc1_root", HCLK_RKVENC0, 0, RK3588_CLKGATE_CON(48), 2, GFLAGS), -+ GATE_LINK(HCLK_RKVDEC0_PRE, "hclk_rkvdec0_pre", "hclk_rkvdec0_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 5, GFLAGS), -+ GATE_LINK(ACLK_RKVDEC0_PRE, "aclk_rkvdec0_pre", "aclk_rkvdec0_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(40), 6, GFLAGS), -+ GATE_LINK(HCLK_RKVDEC1_PRE, "hclk_rkvdec1_pre", "hclk_rkvdec1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 4, GFLAGS), -+ GATE_LINK(ACLK_RKVDEC1_PRE, "aclk_rkvdec1_pre", "aclk_rkvdec1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(41), 5, GFLAGS), -+ GATE_LINK(ACLK_HDCP0_PRE, "aclk_hdcp0_pre", "aclk_vo0_root", ACLK_VOP_LOW_ROOT, 0, RK3588_CLKGATE_CON(55), 9, GFLAGS), -+ GATE_LINK(HCLK_VO0, "hclk_vo0", "hclk_vo0_root", HCLK_VOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(55), 5, GFLAGS), -+ GATE_LINK(ACLK_HDCP1_PRE, "aclk_hdcp1_pre", "aclk_hdcp1_root", ACLK_VO1USB_TOP_ROOT, 0, RK3588_CLKGATE_CON(59), 6, GFLAGS), -+ GATE_LINK(HCLK_VO1, "hclk_vo1", "hclk_vo1_root", HCLK_VO1USB_TOP_ROOT, RK3588_LINKED_CLK, RK3588_CLKGATE_CON(59), 9, GFLAGS), -+ GATE_LINK(ACLK_AV1_PRE, "aclk_av1_pre", "aclk_av1_root", ACLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 1, GFLAGS), -+ GATE_LINK(PCLK_AV1_PRE, "pclk_av1_pre", "pclk_av1_root", HCLK_VDPU_ROOT, 0, RK3588_CLKGATE_CON(68), 4, GFLAGS), -+ GATE_LINK(HCLK_SDIO_PRE, "hclk_sdio_pre", "hclk_sdio_root", HCLK_NVM, 0, RK3588_CLKGATE_CON(75), 1, GFLAGS), -+ GATE_LINK(PCLK_VO0GRF, "pclk_vo0grf", "pclk_vo0_root", HCLK_VO0, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(55), 10, GFLAGS), -+ GATE_LINK(PCLK_VO1GRF, "pclk_vo1grf", "pclk_vo1_root", HCLK_VO1, CLK_IGNORE_UNUSED, RK3588_CLKGATE_CON(59), 12, GFLAGS), - }; - - static void __init rk3588_clk_init(struct device_node *np) --- -2.42.0 - - -From 8016b49a056187298c104908f4b8f1752d24b53e Mon Sep 17 00:00:00 2001 +From d717e20e2c3f9f8a1b363335dafb7a1a42c820c0 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 23 Nov 2023 17:58:21 +0100 -Subject: [PATCH 33/81] clk: rockchip: implement proper GATE_LINK support +Subject: [PATCH 50/71] clk: rockchip: implement proper GATE_LINK support Recent Rockchip SoCs have a new hardware block called Native Interface Unit (NIU), which gates clocks to devices behind them. These effectively @@ -5716,40 +21865,10 @@ index fd3b476dedda..0d8e729fe332 100644 2.42.0 -From 2ccfe13e5006a036ff3483c52823980b7f9d183c Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Tue, 2 Jan 2024 09:33:28 +0100 -Subject: [PATCH 34/81] arm64: dts: rockchip: rk3588-evb1: mark system power - controller - -Mark the primary PMIC as system-power-controller, so that the -system properly shuts down on poweroff. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 2183acb2ff8a..b0e714127905 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -590,6 +590,7 @@ pmic@0 { - <&rk806_dvs2_null>, <&rk806_dvs3_null>; - pinctrl-names = "default"; - spi-max-frequency = <1000000>; -+ system-power-controller; - - vcc1-supply = <&vcc5v0_sys>; - vcc2-supply = <&vcc5v0_sys>; --- -2.42.0 - - -From c779a5796a88423d7a34c072d821cc253dd62473 Mon Sep 17 00:00:00 2001 +From ff4320f73a91929eac1b01077763827f6a78d602 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 2 Jan 2024 09:35:43 +0100 -Subject: [PATCH 35/81] arm64: dts: rockchip: rk3588-evb1: add bluetooth rfkill +Subject: [PATCH 51/71] arm64: dts: rockchip: rk3588-evb1: add bluetooth rfkill Add rfkill support for bluetooth. Bluetooth support itself is still missing, but this ensures bluetooth can be powered off properly. @@ -5760,7 +21879,7 @@ Signed-off-by: Sebastian Reichel 1 file changed, 15 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index b0e714127905..a40ccd7ce5b5 100644 +index 7e22b0e0c754..105f686d8e3a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -120,6 +120,15 @@ backlight: backlight { @@ -5779,7 +21898,7 @@ index b0e714127905..a40ccd7ce5b5 100644 wlan-rfkill { compatible = "rfkill-gpio"; label = "rfkill-pcie-wlan"; -@@ -480,6 +489,12 @@ speaker_amplifier_en: speaker-amplifier-en { +@@ -472,6 +481,12 @@ speaker_amplifier_en: speaker-amplifier-en { }; }; @@ -5796,10 +21915,10 @@ index b0e714127905..a40ccd7ce5b5 100644 2.42.0 -From 39400add3746fecc8db19aa616a51cedb84e5cdb Mon Sep 17 00:00:00 2001 +From 9a07d4edb2db8fc47b23cd622082b6632f6e6d73 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 2 Jan 2024 09:39:11 +0100 -Subject: [PATCH 36/81] arm64: dts: rockchip: rk3588-evb1: improve PCIe +Subject: [PATCH 52/71] arm64: dts: rockchip: rk3588-evb1: improve PCIe ethernet pin muxing Also describe clkreq and wake signals in the PCIe pinmux used @@ -5811,10 +21930,10 @@ Signed-off-by: Sebastian Reichel 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index a40ccd7ce5b5..5bd9999fcaf9 100644 +index 105f686d8e3a..579ce6b6b5ff 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -458,7 +458,7 @@ &pcie2x1l0 { +@@ -450,7 +450,7 @@ &pcie2x1l0 { &pcie2x1l1 { reset-gpios = <&gpio4 RK_PA2 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; @@ -5823,7 +21942,7 @@ index a40ccd7ce5b5..5bd9999fcaf9 100644 status = "okay"; }; -@@ -530,6 +530,14 @@ pcie2_0_clkreq: pcie2-0-clkreq { +@@ -522,6 +522,14 @@ pcie2_0_clkreq: pcie2-0-clkreq { pcie2_1_rst: pcie2-1-rst { rockchip,pins = <4 RK_PA2 RK_FUNC_GPIO &pcfg_pull_none>; }; @@ -5842,10 +21961,10 @@ index a40ccd7ce5b5..5bd9999fcaf9 100644 2.42.0 -From 6d8e35990901d44da898341a080bfe45e6790980 Mon Sep 17 00:00:00 2001 +From 405ee230037850a61866bcc5d47210f883a4f9ac Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Nov 2023 19:58:02 +0200 -Subject: [PATCH 37/81] [WIP] drm/rockchip: vop2: Improve display modes +Subject: [PATCH 53/71] [WIP] drm/rockchip: vop2: Improve display modes handling on rk3588 The initial vop2 support for rk3588 in mainline is not able to handle @@ -5864,7 +21983,7 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 552 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c -index 85b3b4871a1d..df54a1b974bd 100644 +index fdd768bbd487..c1361ceaec41 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -5,6 +5,8 @@ @@ -6075,7 +22194,7 @@ index 85b3b4871a1d..df54a1b974bd 100644 drm_dbg(vop2->drm, "Update mode to %dx%d%s%d, type: %d for vp%d\n", hdisplay, vdisplay, mode->flags & DRM_MODE_FLAG_INTERLACE ? "i" : "p", -@@ -2040,11 +2189,38 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, +@@ -2042,11 +2191,38 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, if (mode->flags & DRM_MODE_FLAG_DBLCLK) { dsp_ctrl |= RK3568_VP_DSP_CTRL__CORE_DCLK_DIV; @@ -6115,7 +22234,7 @@ index 85b3b4871a1d..df54a1b974bd 100644 clk_set_rate(vp->dclk, clock); vop2_post_config(crtc); -@@ -2500,7 +2676,43 @@ static void vop2_crtc_atomic_flush(struct drm_crtc *crtc, +@@ -2502,7 +2678,43 @@ static void vop2_crtc_atomic_flush(struct drm_crtc *crtc, spin_unlock_irq(&crtc->dev->event_lock); } @@ -6159,7 +22278,7 @@ index 85b3b4871a1d..df54a1b974bd 100644 .mode_fixup = vop2_crtc_mode_fixup, .atomic_check = vop2_crtc_atomic_check, .atomic_begin = vop2_crtc_atomic_begin, -@@ -3070,6 +3282,336 @@ static const struct regmap_config vop2_regmap_config = { +@@ -3072,6 +3284,336 @@ static const struct regmap_config vop2_regmap_config = { .cache_type = REGCACHE_MAPLE, }; @@ -6496,7 +22615,7 @@ index 85b3b4871a1d..df54a1b974bd 100644 static int vop2_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); -@@ -3163,6 +3705,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) +@@ -3165,6 +3707,12 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(vop2->pclk); } @@ -6509,7 +22628,7 @@ index 85b3b4871a1d..df54a1b974bd 100644 vop2->irq = platform_get_irq(pdev, 0); if (vop2->irq < 0) { drm_err(vop2->drm, "cannot find irq for vop2\n"); -@@ -3179,6 +3727,9 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) +@@ -3181,6 +3729,9 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) if (ret) return ret; @@ -6523,2264 +22642,10 @@ index 85b3b4871a1d..df54a1b974bd 100644 2.42.0 -From efbd40f789ba143cb0a9fbeea9fddcfdf8e671cf Mon Sep 17 00:00:00 2001 -From: Cristian Ciocaltea -Date: Tue, 16 Jan 2024 21:30:24 +0200 -Subject: [PATCH 38/81] dt-bindings: soc: rockchip: Add rk3588 hdptxphy syscon - -Add compatible for the hdptxphy GRF used by rk3588-hdptx-phy. - -Signed-off-by: Cristian Ciocaltea ---- - Documentation/devicetree/bindings/soc/rockchip/grf.yaml | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -index 12e7a78f7f6b..0b87c266760c 100644 ---- a/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -+++ b/Documentation/devicetree/bindings/soc/rockchip/grf.yaml -@@ -22,6 +22,7 @@ properties: - - rockchip,rk3568-usb2phy-grf - - rockchip,rk3588-bigcore0-grf - - rockchip,rk3588-bigcore1-grf -+ - rockchip,rk3588-hdptxphy-grf - - rockchip,rk3588-ioc - - rockchip,rk3588-php-grf - - rockchip,rk3588-pipe-phy-grf --- -2.42.0 - - -From c4355d0d610b81d21649de60e3d524e7e16c77e8 Mon Sep 17 00:00:00 2001 -From: Cristian Ciocaltea -Date: Tue, 16 Jan 2024 23:14:27 +0200 -Subject: [PATCH 39/81] dt-bindings: phy: Add Rockchip HDMI/DP Combo PHY schema - -Add dt-binding schema for the Rockchip HDMI/DP Transmitter Combo PHY -found on RK3588 SoC. - -Signed-off-by: Cristian Ciocaltea ---- - .../phy/rockchip,rk3588-hdptx-phy.yaml | 96 +++++++++++++++++++ - 1 file changed, 96 insertions(+) - create mode 100644 Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml - -diff --git a/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml -new file mode 100644 -index 000000000000..dd357994ba1b ---- /dev/null -+++ b/Documentation/devicetree/bindings/phy/rockchip,rk3588-hdptx-phy.yaml -@@ -0,0 +1,96 @@ -+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/phy/rockchip,rk3588-hdptx-phy.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: Rockchip SoC HDMI/DP Transmitter Combo PHY -+ -+maintainers: -+ - Cristian Ciocaltea -+ -+properties: -+ compatible: -+ enum: -+ - rockchip,rk3588-hdptx-phy -+ -+ reg: -+ maxItems: 1 -+ -+ clocks: -+ items: -+ - description: Reference clock -+ - description: APB clock -+ -+ clock-names: -+ items: -+ - const: ref -+ - const: apb -+ -+ "#phy-cells": -+ const: 0 -+ -+ resets: -+ items: -+ - description: PHY reset line -+ - description: APB reset line -+ - description: INIT reset line -+ - description: CMN reset line -+ - description: LANE reset line -+ - description: ROPLL reset line -+ - description: LCPLL reset line -+ -+ reset-names: -+ items: -+ - const: phy -+ - const: apb -+ - const: init -+ - const: cmn -+ - const: lane -+ - const: ropll -+ - const: lcpll -+ -+ rockchip,grf: -+ $ref: /schemas/types.yaml#/definitions/phandle -+ description: Some PHY related data is accessed through GRF regs. -+ -+required: -+ - compatible -+ - reg -+ - clocks -+ - clock-names -+ - "#phy-cells" -+ - resets -+ - reset-names -+ - rockchip,grf -+ -+additionalProperties: false -+ -+examples: -+ - | -+ #include -+ #include -+ -+ soc { -+ #address-cells = <2>; -+ #size-cells = <2>; -+ -+ hdptxphy_grf: syscon@fd5e0000 { -+ compatible = "rockchip,rk3588-hdptxphy-grf", "syscon"; -+ reg = <0x0 0xfd5e0000 0x0 0x100>; -+ }; -+ -+ hdptxphy: phy@fed60000 { -+ compatible = "rockchip,rk3588-hdptx-phy"; -+ reg = <0x0 0xfed60000 0x0 0x2000>; -+ clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>; -+ clock-names = "ref", "apb"; -+ #phy-cells = <0>; -+ resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>, -+ <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>, -+ <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>, -+ <&cru SRST_HDPTX0_LCPLL>; -+ reset-names = "phy", "apb", "init", "cmn", "lane", "ropll", "lcpll"; -+ rockchip,grf = <&hdptxphy_grf>; -+ }; -+ }; --- -2.42.0 - - -From a0973439516df2f9f2e912b59070391d5e8ceb16 Mon Sep 17 00:00:00 2001 -From: Cristian Ciocaltea -Date: Thu, 14 Sep 2023 19:10:30 +0300 -Subject: [PATCH 40/81] phy: rockchip: Add Samsung HDMI/DP Combo PHY driver - -Add driver for the Rockchip HDMI TX/eDP Combo PHY found on RK3588 SoC. - -The PHY is based on a Samsung IP block and supports HDMI 2.1 TMDS, FRL -and eDP links. The maximum data rate is 12Gbps (HDMI 2.1 FRL), while -the minimum is 250Mbps (HDMI 2.1 TMDS). - -Co-developed-by: Algea Cao -Signed-off-by: Algea Cao -Signed-off-by: Cristian Ciocaltea ---- - drivers/phy/rockchip/Kconfig | 8 + - drivers/phy/rockchip/Makefile | 1 + - .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 2044 +++++++++++++++++ - 3 files changed, 2053 insertions(+) - create mode 100644 drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c - -diff --git a/drivers/phy/rockchip/Kconfig b/drivers/phy/rockchip/Kconfig -index d21b458c1d18..26c502ecc5e0 100644 ---- a/drivers/phy/rockchip/Kconfig -+++ b/drivers/phy/rockchip/Kconfig -@@ -83,6 +83,14 @@ config PHY_ROCKCHIP_PCIE - help - Enable this to support the Rockchip PCIe PHY. - -+config PHY_ROCKCHIP_SAMSUNG_HDPTX -+ tristate "Rockchip Samsung HDMI/DP Combo PHY driver" -+ depends on (ARCH_ROCKCHIP || COMPILE_TEST) && OF -+ select GENERIC_PHY -+ help -+ Enable this to support the Rockchip HDMI/DP Combo PHY -+ with Samsung IP block. -+ - config PHY_ROCKCHIP_SNPS_PCIE3 - tristate "Rockchip Snps PCIe3 PHY Driver" - depends on (ARCH_ROCKCHIP && OF) || COMPILE_TEST -diff --git a/drivers/phy/rockchip/Makefile b/drivers/phy/rockchip/Makefile -index 25d2e1355db7..010a824e32ce 100644 ---- a/drivers/phy/rockchip/Makefile -+++ b/drivers/phy/rockchip/Makefile -@@ -8,6 +8,7 @@ obj-$(CONFIG_PHY_ROCKCHIP_INNO_HDMI) += phy-rockchip-inno-hdmi.o - obj-$(CONFIG_PHY_ROCKCHIP_INNO_USB2) += phy-rockchip-inno-usb2.o - obj-$(CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY) += phy-rockchip-naneng-combphy.o - obj-$(CONFIG_PHY_ROCKCHIP_PCIE) += phy-rockchip-pcie.o -+obj-$(CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX) += phy-rockchip-samsung-hdptx.o - obj-$(CONFIG_PHY_ROCKCHIP_SNPS_PCIE3) += phy-rockchip-snps-pcie3.o - obj-$(CONFIG_PHY_ROCKCHIP_TYPEC) += phy-rockchip-typec.o - obj-$(CONFIG_PHY_ROCKCHIP_USB) += phy-rockchip-usb.o -diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c -new file mode 100644 -index 000000000000..eece301c646f ---- /dev/null -+++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c -@@ -0,0 +1,2044 @@ -+// SPDX-License-Identifier: GPL-2.0+ -+/* -+ * Copyright (c) 2021-2022 Rockchip Electronics Co., Ltd. -+ * Copyright (c) 2024 Collabora Ltd. -+ * -+ * Author: Algea Cao -+ * Author: Cristian Ciocaltea -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define UPDATE(x, h, l) (((x) << (l)) & GENMASK((h), (l))) -+ -+#define GRF_HDPTX_CON0 0x00 -+#define HDPTX_I_PLL_EN BIT(7) -+#define HDPTX_I_BIAS_EN BIT(6) -+#define HDPTX_I_BGR_EN BIT(5) -+#define GRF_HDPTX_STATUS 0x80 -+#define HDPTX_O_PLL_LOCK_DONE BIT(3) -+#define HDPTX_O_PHY_CLK_RDY BIT(2) -+#define HDPTX_O_PHY_RDY BIT(1) -+#define HDPTX_O_SB_RDY BIT(0) -+ -+#define CMN_REG0000 0x0000 -+#define CMN_REG0001 0x0004 -+#define CMN_REG0002 0x0008 -+#define CMN_REG0003 0x000C -+#define CMN_REG0004 0x0010 -+#define CMN_REG0005 0x0014 -+#define CMN_REG0006 0x0018 -+#define CMN_REG0007 0x001C -+#define CMN_REG0008 0x0020 -+#define LCPLL_EN_MASK BIT(6) -+#define LCPLL_EN(x) UPDATE(x, 4, 4) -+#define LCPLL_LCVCO_MODE_EN_MASK BIT(4) -+#define LCPLL_LCVCO_MODE_EN(x) UPDATE(x, 4, 4) -+#define CMN_REG0009 0x0024 -+#define CMN_REG000A 0x0028 -+#define CMN_REG000B 0x002C -+#define CMN_REG000C 0x0030 -+#define CMN_REG000D 0x0034 -+#define CMN_REG000E 0x0038 -+#define CMN_REG000F 0x003C -+#define CMN_REG0010 0x0040 -+#define CMN_REG0011 0x0044 -+#define CMN_REG0012 0x0048 -+#define CMN_REG0013 0x004C -+#define CMN_REG0014 0x0050 -+#define CMN_REG0015 0x0054 -+#define CMN_REG0016 0x0058 -+#define CMN_REG0017 0x005C -+#define CMN_REG0018 0x0060 -+#define CMN_REG0019 0x0064 -+#define CMN_REG001A 0x0068 -+#define CMN_REG001B 0x006C -+#define CMN_REG001C 0x0070 -+#define CMN_REG001D 0x0074 -+#define CMN_REG001E 0x0078 -+#define LCPLL_PI_EN_MASK BIT(5) -+#define LCPLL_PI_EN(x) UPDATE(x, 5, 5) -+#define LCPLL_100M_CLK_EN_MASK BIT(0) -+#define LCPLL_100M_CLK_EN(x) UPDATE(x, 0, 0) -+#define CMN_REG001F 0x007C -+#define CMN_REG0020 0x0080 -+#define CMN_REG0021 0x0084 -+#define CMN_REG0022 0x0088 -+#define CMN_REG0023 0x008C -+#define CMN_REG0024 0x0090 -+#define CMN_REG0025 0x0094 -+#define LCPLL_PMS_IQDIV_RSTN BIT(4) -+#define CMN_REG0026 0x0098 -+#define CMN_REG0027 0x009C -+#define CMN_REG0028 0x00A0 -+#define LCPLL_SDC_FRAC_EN BIT(2) -+#define LCPLL_SDC_FRAC_RSTN BIT(0) -+#define CMN_REG0029 0x00A4 -+#define CMN_REG002A 0x00A8 -+#define CMN_REG002B 0x00AC -+#define CMN_REG002C 0x00B0 -+#define CMN_REG002D 0x00B4 -+#define LCPLL_SDC_N_MASK GENMASK(3, 1) -+#define LCPLL_SDC_N(x) UPDATE(x, 3, 1) -+#define CMN_REG002E 0x00B8 -+#define LCPLL_SDC_NUMBERATOR_MASK GENMASK(5, 0) -+#define LCPLL_SDC_NUMBERATOR(x) UPDATE(x, 5, 0) -+#define CMN_REG002F 0x00BC -+#define LCPLL_SDC_DENOMINATOR_MASK GENMASK(7, 2) -+#define LCPLL_SDC_DENOMINATOR(x) UPDATE(x, 7, 2) -+#define LCPLL_SDC_NDIV_RSTN BIT(0) -+#define CMN_REG0030 0x00C0 -+#define CMN_REG0031 0x00C4 -+#define CMN_REG0032 0x00C8 -+#define CMN_REG0033 0x00CC -+#define CMN_REG0034 0x00D0 -+#define CMN_REG0035 0x00D4 -+#define CMN_REG0036 0x00D8 -+#define CMN_REG0037 0x00DC -+#define CMN_REG0038 0x00E0 -+#define CMN_REG0039 0x00E4 -+#define CMN_REG003A 0x00E8 -+#define CMN_REG003B 0x00EC -+#define CMN_REG003C 0x00F0 -+#define CMN_REG003D 0x00F4 -+#define ROPLL_LCVCO_EN BIT(4) -+#define CMN_REG003E 0x00F8 -+#define CMN_REG003F 0x00FC -+#define CMN_REG0040 0x0100 -+#define CMN_REG0041 0x0104 -+#define CMN_REG0042 0x0108 -+#define CMN_REG0043 0x010C -+#define CMN_REG0044 0x0110 -+#define CMN_REG0045 0x0114 -+#define CMN_REG0046 0x0118 -+#define CMN_REG0047 0x011C -+#define CMN_REG0048 0x0120 -+#define CMN_REG0049 0x0124 -+#define CMN_REG004A 0x0128 -+#define CMN_REG004B 0x012C -+#define CMN_REG004C 0x0130 -+#define CMN_REG004D 0x0134 -+#define CMN_REG004E 0x0138 -+#define ROPLL_PI_EN BIT(5) -+#define CMN_REG004F 0x013C -+#define CMN_REG0050 0x0140 -+#define CMN_REG0051 0x0144 -+#define CMN_REG0052 0x0148 -+#define CMN_REG0053 0x014C -+#define CMN_REG0054 0x0150 -+#define CMN_REG0055 0x0154 -+#define CMN_REG0056 0x0158 -+#define CMN_REG0057 0x015C -+#define CMN_REG0058 0x0160 -+#define CMN_REG0059 0x0164 -+#define CMN_REG005A 0x0168 -+#define CMN_REG005B 0x016C -+#define CMN_REG005C 0x0170 -+#define ROPLL_PMS_IQDIV_RSTN BIT(5) -+#define CMN_REG005D 0x0174 -+#define CMN_REG005E 0x0178 -+#define ROPLL_SDM_EN_MASK BIT(6) -+#define ROPLL_SDM_EN(x) UPDATE(x, 6, 6) -+#define ROPLL_SDM_FRAC_EN_RBR BIT(3) -+#define ROPLL_SDM_FRAC_EN_HBR BIT(2) -+#define ROPLL_SDM_FRAC_EN_HBR2 BIT(1) -+#define ROPLL_SDM_FRAC_EN_HBR3 BIT(0) -+#define CMN_REG005F 0x017C -+#define CMN_REG0060 0x0180 -+#define CMN_REG0061 0x0184 -+#define CMN_REG0062 0x0188 -+#define CMN_REG0063 0x018C -+#define CMN_REG0064 0x0190 -+#define ROPLL_SDM_NUM_SIGN_RBR_MASK BIT(3) -+#define ROPLL_SDM_NUM_SIGN_RBR(x) UPDATE(x, 3, 3) -+#define CMN_REG0065 0x0194 -+#define CMN_REG0066 0x0198 -+#define CMN_REG0067 0x019C -+#define CMN_REG0068 0x01A0 -+#define CMN_REG0069 0x01A4 -+#define ROPLL_SDC_N_RBR_MASK GENMASK(2, 0) -+#define ROPLL_SDC_N_RBR(x) UPDATE(x, 2, 0) -+#define CMN_REG006A 0x01A8 -+#define CMN_REG006B 0x01AC -+#define CMN_REG006C 0x01B0 -+#define CMN_REG006D 0x01B4 -+#define CMN_REG006E 0x01B8 -+#define CMN_REG006F 0x01BC -+#define CMN_REG0070 0x01C0 -+#define CMN_REG0071 0x01C4 -+#define CMN_REG0072 0x01C8 -+#define CMN_REG0073 0x01CC -+#define CMN_REG0074 0x01D0 -+#define ROPLL_SDC_NDIV_RSTN BIT(2) -+#define ROPLL_SSC_EN BIT(0) -+#define CMN_REG0075 0x01D4 -+#define CMN_REG0076 0x01D8 -+#define CMN_REG0077 0x01DC -+#define CMN_REG0078 0x01E0 -+#define CMN_REG0079 0x01E4 -+#define CMN_REG007A 0x01E8 -+#define CMN_REG007B 0x01EC -+#define CMN_REG007C 0x01F0 -+#define CMN_REG007D 0x01F4 -+#define CMN_REG007E 0x01F8 -+#define CMN_REG007F 0x01FC -+#define CMN_REG0080 0x0200 -+#define CMN_REG0081 0x0204 -+#define OVRD_PLL_CD_CLK_EN BIT(8) -+#define PLL_CD_HSCLK_EAST_EN BIT(0) -+#define CMN_REG0082 0x0208 -+#define CMN_REG0083 0x020C -+#define CMN_REG0084 0x0210 -+#define CMN_REG0085 0x0214 -+#define CMN_REG0086 0x0218 -+#define PLL_PCG_POSTDIV_SEL_MASK GENMASK(7, 4) -+#define PLL_PCG_POSTDIV_SEL(x) UPDATE(x, 7, 4) -+#define PLL_PCG_CLK_SEL_MASK GENMASK(3, 1) -+#define PLL_PCG_CLK_SEL(x) UPDATE(x, 3, 1) -+#define PLL_PCG_CLK_EN BIT(0) -+#define CMN_REG0087 0x021C -+#define PLL_FRL_MODE_EN BIT(3) -+#define PLL_TX_HS_CLK_EN BIT(2) -+#define CMN_REG0088 0x0220 -+#define CMN_REG0089 0x0224 -+#define LCPLL_ALONE_MODE BIT(1) -+#define CMN_REG008A 0x0228 -+#define CMN_REG008B 0x022C -+#define CMN_REG008C 0x0230 -+#define CMN_REG008D 0x0234 -+#define CMN_REG008E 0x0238 -+#define CMN_REG008F 0x023C -+#define CMN_REG0090 0x0240 -+#define CMN_REG0091 0x0244 -+#define CMN_REG0092 0x0248 -+#define CMN_REG0093 0x024C -+#define CMN_REG0094 0x0250 -+#define CMN_REG0095 0x0254 -+#define CMN_REG0096 0x0258 -+#define CMN_REG0097 0x025C -+#define DIG_CLK_SEL BIT(1) -+#define ROPLL_REF BIT(1) -+#define LCPLL_REF 0 -+#define CMN_REG0098 0x0260 -+#define CMN_REG0099 0x0264 -+#define CMN_ROPLL_ALONE_MODE BIT(2) -+#define ROPLL_ALONE_MODE BIT(2) -+#define CMN_REG009A 0x0268 -+#define HS_SPEED_SEL BIT(0) -+#define DIV_10_CLOCK BIT(0) -+#define CMN_REG009B 0x026C -+#define IS_SPEED_SEL BIT(4) -+#define LINK_SYMBOL_CLOCK BIT(4) -+#define LINK_SYMBOL_CLOCK1_2 0 -+#define CMN_REG009C 0x0270 -+#define CMN_REG009D 0x0274 -+#define CMN_REG009E 0x0278 -+#define CMN_REG009F 0x027C -+#define CMN_REG00A0 0x0280 -+#define CMN_REG00A1 0x0284 -+#define CMN_REG00A2 0x0288 -+#define CMN_REG00A3 0x028C -+#define CMN_REG00AD 0x0290 -+#define CMN_REG00A5 0x0294 -+#define CMN_REG00A6 0x0298 -+#define CMN_REG00A7 0x029C -+#define SB_REG0100 0x0400 -+#define SB_REG0101 0x0404 -+#define SB_REG0102 0x0408 -+#define OVRD_SB_RXTERM_EN_MASK BIT(5) -+#define OVRD_SB_RXTERM_EN(x) UPDATE(x, 5, 5) -+#define SB_RXTERM_EN_MASK BIT(4) -+#define SB_RXTERM_EN(x) UPDATE(x, 4, 4) -+#define ANA_SB_RXTERM_OFFSP_MASK GENMASK(3, 0) -+#define ANA_SB_RXTERM_OFFSP(x) UPDATE(x, 3, 0) -+#define SB_REG0103 0x040C -+#define ANA_SB_RXTERM_OFFSN_MASK GENMASK(6, 3) -+#define ANA_SB_RXTERM_OFFSN(x) UPDATE(x, 6, 3) -+#define OVRD_SB_RX_RESCAL_DONE_MASK BIT(1) -+#define OVRD_SB_RX_RESCAL_DONE(x) UPDATE(x, 1, 1) -+#define SB_RX_RESCAL_DONE_MASK BIT(0) -+#define SB_RX_RESCAL_DONE(x) UPDATE(x, 0, 0) -+#define SB_REG0104 0x0410 -+#define OVRD_SB_EN_MASK BIT(5) -+#define OVRD_SB_EN(x) UPDATE(x, 5, 5) -+#define SB_EN_MASK BIT(4) -+#define SB_EN(x) UPDATE(x, 4, 4) -+#define SB_REG0105 0x0414 -+#define OVRD_SB_EARC_CMDC_EN_MASK BIT(6) -+#define OVRD_SB_EARC_CMDC_EN(x) UPDATE(x, 6, 6) -+#define SB_EARC_CMDC_EN_MASK BIT(5) -+#define SB_EARC_CMDC_EN(x) UPDATE(x, 5, 5) -+#define ANA_SB_TX_HLVL_PROG_MASK GENMASK(2, 0) -+#define ANA_SB_TX_HLVL_PROG(x) UPDATE(x, 2, 0) -+#define SB_REG0106 0x0418 -+#define ANA_SB_TX_LLVL_PROG_MASK GENMASK(6, 4) -+#define ANA_SB_TX_LLVL_PROG(x) UPDATE(x, 6, 4) -+#define SB_REG0107 0x041C -+#define SB_REG0108 0x0420 -+#define SB_REG0109 0x0424 -+#define ANA_SB_DMRX_AFC_DIV_RATIO_MASK GENMASK(2, 0) -+#define ANA_SB_DMRX_AFC_DIV_RATIO(x) UPDATE(x, 2, 0) -+#define SB_REG010A 0x0428 -+#define SB_REG010B 0x042C -+#define SB_REG010C 0x0430 -+#define SB_REG010D 0x0434 -+#define SB_REG010E 0x0438 -+#define SB_REG010F 0x043C -+#define OVRD_SB_VREG_EN_MASK BIT(7) -+#define OVRD_SB_VREG_EN(x) UPDATE(x, 7, 7) -+#define SB_VREG_EN_MASK BIT(6) -+#define SB_VREG_EN(x) UPDATE(x, 6, 6) -+#define OVRD_SB_VREG_LPF_BYPASS_MASK BIT(5) -+#define OVRD_SB_VREG_LPF_BYPASS(x) UPDATE(x, 5, 5) -+#define SB_VREG_LPF_BYPASS_MASK BIT(4) -+#define SB_VREG_LPF_BYPASS(x) UPDATE(x, 4, 4) -+#define ANA_SB_VREG_GAIN_CTRL_MASK GENMASK(3, 0) -+#define ANA_SB_VREG_GAIN_CTRL(x) UPDATE(x, 3, 0) -+#define SB_REG0110 0x0440 -+#define ANA_SB_VREG_REF_SEL_MASK BIT(0) -+#define ANA_SB_VREG_REF_SEL(x) UPDATE(x, 0, 0) -+#define SB_REG0111 0x0444 -+#define SB_REG0112 0x0448 -+#define SB_REG0113 0x044C -+#define SB_RX_RCAL_OPT_CODE_MASK GENMASK(5, 4) -+#define SB_RX_RCAL_OPT_CODE(x) UPDATE(x, 5, 4) -+#define SB_RX_RTERM_CTRL_MASK GENMASK(3, 0) -+#define SB_RX_RTERM_CTRL(x) UPDATE(x, 3, 0) -+#define SB_REG0114 0x0450 -+#define SB_TG_SB_EN_DELAY_TIME_MASK GENMASK(5, 3) -+#define SB_TG_SB_EN_DELAY_TIME(x) UPDATE(x, 5, 3) -+#define SB_TG_RXTERM_EN_DELAY_TIME_MASK GENMASK(2, 0) -+#define SB_TG_RXTERM_EN_DELAY_TIME(x) UPDATE(x, 2, 0) -+#define SB_REG0115 0x0454 -+#define SB_READY_DELAY_TIME_MASK GENMASK(5, 3) -+#define SB_READY_DELAY_TIME(x) UPDATE(x, 5, 3) -+#define SB_TG_OSC_EN_DELAY_TIME_MASK GENMASK(2, 0) -+#define SB_TG_OSC_EN_DELAY_TIME(x) UPDATE(x, 2, 0) -+#define SB_REG0116 0x0458 -+#define AFC_RSTN_DELAY_TIME_MASK GENMASK(6, 4) -+#define AFC_RSTN_DELAY_TIME(x) UPDATE(x, 6, 4) -+#define SB_REG0117 0x045C -+#define FAST_PULSE_TIME_MASK GENMASK(3, 0) -+#define FAST_PULSE_TIME(x) UPDATE(x, 3, 0) -+#define SB_REG0118 0x0460 -+#define SB_REG0119 0x0464 -+#define SB_REG011A 0x0468 -+#define SB_REG011B 0x046C -+#define SB_EARC_SIG_DET_BYPASS_MASK BIT(4) -+#define SB_EARC_SIG_DET_BYPASS(x) UPDATE(x, 4, 4) -+#define SB_AFC_TOL_MASK GENMASK(3, 0) -+#define SB_AFC_TOL(x) UPDATE(x, 3, 0) -+#define SB_REG011C 0x0470 -+#define SB_REG011D 0x0474 -+#define SB_REG011E 0x0478 -+#define SB_REG011F 0x047C -+#define SB_PWM_AFC_CTRL_MASK GENMASK(7, 2) -+#define SB_PWM_AFC_CTRL(x) UPDATE(x, 7, 2) -+#define SB_RCAL_RSTN_MASK BIT(1) -+#define SB_RCAL_RSTN(x) UPDATE(x, 1, 1) -+#define SB_REG0120 0x0480 -+#define SB_EARC_EN_MASK BIT(1) -+#define SB_EARC_EN(x) UPDATE(x, 1, 1) -+#define SB_EARC_AFC_EN_MASK BIT(2) -+#define SB_EARC_AFC_EN(x) UPDATE(x, 2, 2) -+#define SB_REG0121 0x0484 -+#define SB_REG0122 0x0488 -+#define SB_REG0123 0x048C -+#define OVRD_SB_READY_MASK BIT(5) -+#define OVRD_SB_READY(x) UPDATE(x, 5, 5) -+#define SB_READY_MASK BIT(4) -+#define SB_READY(x) UPDATE(x, 4, 4) -+#define SB_REG0124 0x0490 -+#define SB_REG0125 0x0494 -+#define SB_REG0126 0x0498 -+#define SB_REG0127 0x049C -+#define SB_REG0128 0x04A0 -+#define SB_REG0129 0x04AD -+#define LNTOP_REG0200 0x0800 -+#define PROTOCOL_SEL BIT(2) -+#define HDMI_MODE BIT(2) -+#define HDMI_TMDS_FRL_SEL BIT(1) -+#define LNTOP_REG0201 0x0804 -+#define LNTOP_REG0202 0x0808 -+#define LNTOP_REG0203 0x080C -+#define LNTOP_REG0204 0x0810 -+#define LNTOP_REG0205 0x0814 -+#define LNTOP_REG0206 0x0818 -+#define DATA_BUS_WIDTH (0x3 << 1) -+#define WIDTH_40BIT (0x3 << 1) -+#define WIDTH_36BIT (0x2 << 1) -+#define DATA_BUS_SEL BIT(0) -+#define DATA_BUS_36_40 BIT(0) -+#define LNTOP_REG0207 0x081C -+#define LANE_EN 0xf -+#define ALL_LANE_EN 0xf -+#define LNTOP_REG0208 0x0820 -+#define LNTOP_REG0209 0x0824 -+#define LNTOP_REG020A 0x0828 -+#define LNTOP_REG020B 0x082C -+#define LNTOP_REG020C 0x0830 -+#define LNTOP_REG020D 0x0834 -+#define LNTOP_REG020E 0x0838 -+#define LNTOP_REG020F 0x083C -+#define LNTOP_REG0210 0x0840 -+#define LNTOP_REG0211 0x0844 -+#define LNTOP_REG0212 0x0848 -+#define LNTOP_REG0213 0x084C -+#define LNTOP_REG0214 0x0850 -+#define LNTOP_REG0215 0x0854 -+#define LNTOP_REG0216 0x0858 -+#define LNTOP_REG0217 0x085C -+#define LNTOP_REG0218 0x0860 -+#define LNTOP_REG0219 0x0864 -+#define LNTOP_REG021A 0x0868 -+#define LNTOP_REG021B 0x086C -+#define LNTOP_REG021C 0x0870 -+#define LNTOP_REG021D 0x0874 -+#define LNTOP_REG021E 0x0878 -+#define LNTOP_REG021F 0x087C -+#define LNTOP_REG0220 0x0880 -+#define LNTOP_REG0221 0x0884 -+#define LNTOP_REG0222 0x0888 -+#define LNTOP_REG0223 0x088C -+#define LNTOP_REG0224 0x0890 -+#define LNTOP_REG0225 0x0894 -+#define LNTOP_REG0226 0x0898 -+#define LNTOP_REG0227 0x089C -+#define LNTOP_REG0228 0x08A0 -+#define LNTOP_REG0229 0x08A4 -+#define LANE_REG0300 0x0C00 -+#define LANE_REG0301 0x0C04 -+#define LANE_REG0302 0x0C08 -+#define LANE_REG0303 0x0C0C -+#define LANE_REG0304 0x0C10 -+#define LANE_REG0305 0x0C14 -+#define LANE_REG0306 0x0C18 -+#define LANE_REG0307 0x0C1C -+#define LANE_REG0308 0x0C20 -+#define LANE_REG0309 0x0C24 -+#define LANE_REG030A 0x0C28 -+#define LANE_REG030B 0x0C2C -+#define LANE_REG030C 0x0C30 -+#define LANE_REG030D 0x0C34 -+#define LANE_REG030E 0x0C38 -+#define LANE_REG030F 0x0C3C -+#define LANE_REG0310 0x0C40 -+#define LANE_REG0311 0x0C44 -+#define LANE_REG0312 0x0C48 -+#define LN0_TX_SER_RATE_SEL_RBR BIT(5) -+#define LN0_TX_SER_RATE_SEL_HBR BIT(4) -+#define LN0_TX_SER_RATE_SEL_HBR2 BIT(3) -+#define LN0_TX_SER_RATE_SEL_HBR3 BIT(2) -+#define LANE_REG0313 0x0C4C -+#define LANE_REG0314 0x0C50 -+#define LANE_REG0315 0x0C54 -+#define LANE_REG0316 0x0C58 -+#define LANE_REG0317 0x0C5C -+#define LANE_REG0318 0x0C60 -+#define LANE_REG0319 0x0C64 -+#define LANE_REG031A 0x0C68 -+#define LANE_REG031B 0x0C6C -+#define LANE_REG031C 0x0C70 -+#define LANE_REG031D 0x0C74 -+#define LANE_REG031E 0x0C78 -+#define LANE_REG031F 0x0C7C -+#define LANE_REG0320 0x0C80 -+#define LANE_REG0321 0x0C84 -+#define LANE_REG0322 0x0C88 -+#define LANE_REG0323 0x0C8C -+#define LANE_REG0324 0x0C90 -+#define LANE_REG0325 0x0C94 -+#define LANE_REG0326 0x0C98 -+#define LANE_REG0327 0x0C9C -+#define LANE_REG0328 0x0CA0 -+#define LANE_REG0329 0x0CA4 -+#define LANE_REG032A 0x0CA8 -+#define LANE_REG032B 0x0CAC -+#define LANE_REG032C 0x0CB0 -+#define LANE_REG032D 0x0CB4 -+#define LANE_REG0400 0x1000 -+#define LANE_REG0401 0x1004 -+#define LANE_REG0402 0x1008 -+#define LANE_REG0403 0x100C -+#define LANE_REG0404 0x1010 -+#define LANE_REG0405 0x1014 -+#define LANE_REG0406 0x1018 -+#define LANE_REG0407 0x101C -+#define LANE_REG0408 0x1020 -+#define LANE_REG0409 0x1024 -+#define LANE_REG040A 0x1028 -+#define LANE_REG040B 0x102C -+#define LANE_REG040C 0x1030 -+#define LANE_REG040D 0x1034 -+#define LANE_REG040E 0x1038 -+#define LANE_REG040F 0x103C -+#define LANE_REG0410 0x1040 -+#define LANE_REG0411 0x1044 -+#define LANE_REG0412 0x1048 -+#define LN1_TX_SER_RATE_SEL_RBR BIT(5) -+#define LN1_TX_SER_RATE_SEL_HBR BIT(4) -+#define LN1_TX_SER_RATE_SEL_HBR2 BIT(3) -+#define LN1_TX_SER_RATE_SEL_HBR3 BIT(2) -+#define LANE_REG0413 0x104C -+#define LANE_REG0414 0x1050 -+#define LANE_REG0415 0x1054 -+#define LANE_REG0416 0x1058 -+#define LANE_REG0417 0x105C -+#define LANE_REG0418 0x1060 -+#define LANE_REG0419 0x1064 -+#define LANE_REG041A 0x1068 -+#define LANE_REG041B 0x106C -+#define LANE_REG041C 0x1070 -+#define LANE_REG041D 0x1074 -+#define LANE_REG041E 0x1078 -+#define LANE_REG041F 0x107C -+#define LANE_REG0420 0x1080 -+#define LANE_REG0421 0x1084 -+#define LANE_REG0422 0x1088 -+#define LANE_REG0423 0x108C -+#define LANE_REG0424 0x1090 -+#define LANE_REG0425 0x1094 -+#define LANE_REG0426 0x1098 -+#define LANE_REG0427 0x109C -+#define LANE_REG0428 0x10A0 -+#define LANE_REG0429 0x10A4 -+#define LANE_REG042A 0x10A8 -+#define LANE_REG042B 0x10AC -+#define LANE_REG042C 0x10B0 -+#define LANE_REG042D 0x10B4 -+#define LANE_REG0500 0x1400 -+#define LANE_REG0501 0x1404 -+#define LANE_REG0502 0x1408 -+#define LANE_REG0503 0x140C -+#define LANE_REG0504 0x1410 -+#define LANE_REG0505 0x1414 -+#define LANE_REG0506 0x1418 -+#define LANE_REG0507 0x141C -+#define LANE_REG0508 0x1420 -+#define LANE_REG0509 0x1424 -+#define LANE_REG050A 0x1428 -+#define LANE_REG050B 0x142C -+#define LANE_REG050C 0x1430 -+#define LANE_REG050D 0x1434 -+#define LANE_REG050E 0x1438 -+#define LANE_REG050F 0x143C -+#define LANE_REG0510 0x1440 -+#define LANE_REG0511 0x1444 -+#define LANE_REG0512 0x1448 -+#define LN2_TX_SER_RATE_SEL_RBR BIT(5) -+#define LN2_TX_SER_RATE_SEL_HBR BIT(4) -+#define LN2_TX_SER_RATE_SEL_HBR2 BIT(3) -+#define LN2_TX_SER_RATE_SEL_HBR3 BIT(2) -+#define LANE_REG0513 0x144C -+#define LANE_REG0514 0x1450 -+#define LANE_REG0515 0x1454 -+#define LANE_REG0516 0x1458 -+#define LANE_REG0517 0x145C -+#define LANE_REG0518 0x1460 -+#define LANE_REG0519 0x1464 -+#define LANE_REG051A 0x1468 -+#define LANE_REG051B 0x146C -+#define LANE_REG051C 0x1470 -+#define LANE_REG051D 0x1474 -+#define LANE_REG051E 0x1478 -+#define LANE_REG051F 0x147C -+#define LANE_REG0520 0x1480 -+#define LANE_REG0521 0x1484 -+#define LANE_REG0522 0x1488 -+#define LANE_REG0523 0x148C -+#define LANE_REG0524 0x1490 -+#define LANE_REG0525 0x1494 -+#define LANE_REG0526 0x1498 -+#define LANE_REG0527 0x149C -+#define LANE_REG0528 0x14A0 -+#define LANE_REG0529 0x14AD -+#define LANE_REG052A 0x14A8 -+#define LANE_REG052B 0x14AC -+#define LANE_REG052C 0x14B0 -+#define LANE_REG052D 0x14B4 -+#define LANE_REG0600 0x1800 -+#define LANE_REG0601 0x1804 -+#define LANE_REG0602 0x1808 -+#define LANE_REG0603 0x180C -+#define LANE_REG0604 0x1810 -+#define LANE_REG0605 0x1814 -+#define LANE_REG0606 0x1818 -+#define LANE_REG0607 0x181C -+#define LANE_REG0608 0x1820 -+#define LANE_REG0609 0x1824 -+#define LANE_REG060A 0x1828 -+#define LANE_REG060B 0x182C -+#define LANE_REG060C 0x1830 -+#define LANE_REG060D 0x1834 -+#define LANE_REG060E 0x1838 -+#define LANE_REG060F 0x183C -+#define LANE_REG0610 0x1840 -+#define LANE_REG0611 0x1844 -+#define LANE_REG0612 0x1848 -+#define LN3_TX_SER_RATE_SEL_RBR BIT(5) -+#define LN3_TX_SER_RATE_SEL_HBR BIT(4) -+#define LN3_TX_SER_RATE_SEL_HBR2 BIT(3) -+#define LN3_TX_SER_RATE_SEL_HBR3 BIT(2) -+#define LANE_REG0613 0x184C -+#define LANE_REG0614 0x1850 -+#define LANE_REG0615 0x1854 -+#define LANE_REG0616 0x1858 -+#define LANE_REG0617 0x185C -+#define LANE_REG0618 0x1860 -+#define LANE_REG0619 0x1864 -+#define LANE_REG061A 0x1868 -+#define LANE_REG061B 0x186C -+#define LANE_REG061C 0x1870 -+#define LANE_REG061D 0x1874 -+#define LANE_REG061E 0x1878 -+#define LANE_REG061F 0x187C -+#define LANE_REG0620 0x1880 -+#define LANE_REG0621 0x1884 -+#define LANE_REG0622 0x1888 -+#define LANE_REG0623 0x188C -+#define LANE_REG0624 0x1890 -+#define LANE_REG0625 0x1894 -+#define LANE_REG0626 0x1898 -+#define LANE_REG0627 0x189C -+#define LANE_REG0628 0x18A0 -+#define LANE_REG0629 0x18A4 -+#define LANE_REG062A 0x18A8 -+#define LANE_REG062B 0x18AC -+#define LANE_REG062C 0x18B0 -+#define LANE_REG062D 0x18B4 -+ -+#define HDMI20_MAX_RATE 600000000 -+#define DATA_RATE_MASK 0xFFFFFFF -+#define COLOR_DEPTH_MASK BIT(31) -+#define HDMI_MODE_MASK BIT(30) -+#define HDMI_EARC_MASK BIT(29) -+ -+struct lcpll_config { -+ u32 bit_rate; -+ u8 lcvco_mode_en; -+ u8 pi_en; -+ u8 clk_en_100m; -+ u8 pms_mdiv; -+ u8 pms_mdiv_afc; -+ u8 pms_pdiv; -+ u8 pms_refdiv; -+ u8 pms_sdiv; -+ u8 pi_cdiv_rstn; -+ u8 pi_cdiv_sel; -+ u8 sdm_en; -+ u8 sdm_rstn; -+ u8 sdc_frac_en; -+ u8 sdc_rstn; -+ u8 sdm_deno; -+ u8 sdm_num_sign; -+ u8 sdm_num; -+ u8 sdc_n; -+ u8 sdc_n2; -+ u8 sdc_num; -+ u8 sdc_deno; -+ u8 sdc_ndiv_rstn; -+ u8 ssc_en; -+ u8 ssc_fm_dev; -+ u8 ssc_fm_freq; -+ u8 ssc_clk_div_sel; -+ u8 cd_tx_ser_rate_sel; -+}; -+ -+struct ropll_config { -+ u32 bit_rate; -+ u8 pms_mdiv; -+ u8 pms_mdiv_afc; -+ u8 pms_pdiv; -+ u8 pms_refdiv; -+ u8 pms_sdiv; -+ u8 pms_iqdiv_rstn; -+ u8 ref_clk_sel; -+ u8 sdm_en; -+ u8 sdm_rstn; -+ u8 sdc_frac_en; -+ u8 sdc_rstn; -+ u8 sdm_clk_div; -+ u8 sdm_deno; -+ u8 sdm_num_sign; -+ u8 sdm_num; -+ u8 sdc_n; -+ u8 sdc_num; -+ u8 sdc_deno; -+ u8 sdc_ndiv_rstn; -+ u8 ssc_en; -+ u8 ssc_fm_dev; -+ u8 ssc_fm_freq; -+ u8 ssc_clk_div_sel; -+ u8 ana_cpp_ctrl; -+ u8 ana_lpf_c_sel; -+ u8 cd_tx_ser_rate_sel; -+}; -+ -+enum rockchip_hdptx_reset { -+ RST_PHY = 0, -+ RST_APB, -+ RST_INIT, -+ RST_CMN, -+ RST_LANE, -+ RST_ROPLL, -+ RST_LCPLL, -+ RST_MAX -+}; -+ -+struct rockchip_hdptx_phy { -+ struct device *dev; -+ struct regmap *regmap; -+ struct regmap *grf; -+ -+ struct phy *phy; -+ struct phy_config *phy_cfg; -+ struct clk_bulk_data *clks; -+ int nr_clks; -+ struct reset_control_bulk_data rsts[RST_MAX]; -+ bool earc_en; -+}; -+ -+static const struct lcpll_config lcpll_cfg[] = { -+ { 48000000, 1, 0, 0, 0x7d, 0x7d, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, -+ 0, 0x13, 0x18, 1, 0, 0x20, 0x0c, 1, 0, }, -+ { 40000000, 1, 1, 0, 0x68, 0x68, 1, 1, 0, 0, 0, 1, 1, 1, 1, 9, 0, 1, 1, -+ 0, 2, 3, 1, 0, 0x20, 0x0c, 1, 0, }, -+ { 32000000, 1, 1, 1, 0x6b, 0x6b, 1, 1, 0, 1, 2, 1, 1, 1, 1, 9, 1, 2, 1, -+ 0, 0x0d, 0x18, 1, 0, 0x20, 0x0c, 1, 1, }, -+ { ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, 0, }, -+}; -+ -+static const struct ropll_config ropll_frl_cfg[] = { -+ { 24000000, 0x19, 0x19, 1, 1, 0, 1, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, -+ 0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 18000000, 0x7d, 0x7d, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, -+ 0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 9000000, 0x7d, 0x7d, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, -+ 0, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, }, -+}; -+ -+static const struct ropll_config ropll_tmds_cfg[] = { -+ { 5940000, 124, 124, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 3712500, 155, 155, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 2970000, 124, 124, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1620000, 135, 135, 1, 1, 3, 1, 1, 0, 1, 1, 1, 1, 4, 0, 3, 5, 5, 0x10, -+ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1856250, 155, 155, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1540000, 193, 193, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 193, 1, 32, 2, 1, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1485000, 0x7b, 0x7b, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 4, 0, 3, 5, 5, -+ 0x10, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1462500, 122, 122, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 244, 1, 16, 2, 1, 1, -+ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1190000, 149, 149, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 149, 1, 16, 2, 1, 1, -+ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1065000, 89, 89, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 89, 1, 16, 1, 0, 1, -+ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 1080000, 135, 135, 1, 1, 5, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, -+ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 855000, 214, 214, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 214, 1, 16, 2, 1, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 835000, 105, 105, 1, 1, 5, 1, 1, 1, 1, 1, 1, 1, 42, 1, 16, 1, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 928125, 155, 155, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 742500, 124, 124, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 62, 1, 16, 5, 0, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 650000, 162, 162, 1, 1, 11, 1, 1, 1, 1, 1, 1, 1, 54, 0, 16, 4, 1, -+ 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 337500, 0x70, 0x70, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 0x2, 0, 0x01, 5, -+ 1, 1, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 400000, 100, 100, 1, 1, 11, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, -+ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 270000, 0x5a, 0x5a, 1, 1, 0xf, 1, 1, 0, 1, 0, 1, 1, 0x9, 0, 0x05, 0, -+ 0x14, 0x18, 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { 251750, 84, 84, 1, 1, 0xf, 1, 1, 1, 1, 1, 1, 1, 168, 1, 16, 4, 1, 1, -+ 1, 0, 0x20, 0x0c, 1, 0x0e, 0, 0, }, -+ { ~0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -+ 0, 0, 0, 0, }, -+}; -+ -+static bool rockchip_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) -+{ -+ switch (reg) { -+ case 0x0000 ... 0x029c: -+ case 0x0400 ... 0x04a4: -+ case 0x0800 ... 0x08a4: -+ case 0x0c00 ... 0x0cb4: -+ case 0x1000 ... 0x10b4: -+ case 0x1400 ... 0x14b4: -+ case 0x1800 ... 0x18b4: -+ return true; -+ } -+ -+ return false; -+} -+ -+static const struct regmap_config rockchip_hdptx_phy_regmap_config = { -+ .name = "hdptx-combphy", -+ .reg_bits = 32, -+ .reg_stride = 4, -+ .val_bits = 32, -+ .fast_io = true, -+ .writeable_reg = rockchip_hdptx_phy_is_rw_reg, -+ .readable_reg = rockchip_hdptx_phy_is_rw_reg, -+ .max_register = 0x18b4, -+}; -+ -+static int hdptx_write(struct rockchip_hdptx_phy *hdptx, u32 reg, u8 val) -+{ -+ return regmap_write(hdptx->regmap, reg, val); -+} -+ -+static int hdptx_update_bits(struct rockchip_hdptx_phy *hdptx, u32 reg, -+ u8 mask, u8 val) -+{ -+ return regmap_update_bits(hdptx->regmap, reg, mask, val); -+} -+ -+static int hdptx_grf_write(struct rockchip_hdptx_phy *hdptx, u32 reg, u32 val) -+{ -+ return regmap_write(hdptx->grf, reg, val); -+} -+ -+static u8 hdptx_grf_read(struct rockchip_hdptx_phy *hdptx, u32 reg) -+{ -+ u32 val; -+ -+ regmap_read(hdptx->grf, reg, &val); -+ -+ return val; -+} -+ -+static void hdptx_pre_power_up(struct rockchip_hdptx_phy *hdptx) -+{ -+ u32 val = 0; -+ -+ reset_control_assert(hdptx->rsts[RST_APB].rstc); -+ udelay(20); -+ reset_control_deassert(hdptx->rsts[RST_APB].rstc); -+ -+ reset_control_assert(hdptx->rsts[RST_LANE].rstc); -+ reset_control_assert(hdptx->rsts[RST_CMN].rstc); -+ reset_control_assert(hdptx->rsts[RST_INIT].rstc); -+ -+ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+} -+ -+static int hdptx_post_enable_lane(struct rockchip_hdptx_phy *hdptx) -+{ -+ u32 val; -+ int ret; -+ -+ reset_control_deassert(hdptx->rsts[RST_LANE].rstc); -+ -+ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | -+ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+ -+ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, -+ (val & HDPTX_O_PHY_RDY) && -+ (val & HDPTX_O_PLL_LOCK_DONE), -+ 100, 5000); -+ if (ret) { -+ dev_err(hdptx->dev, "Failed to get PHY lane lock: %d\n", ret); -+ return ret; -+ } -+ -+ dev_dbg(hdptx->dev, "PHY lane locked\n"); -+ -+ return 0; -+} -+ -+static int hdptx_post_enable_pll(struct rockchip_hdptx_phy *hdptx) -+{ -+ u32 val; -+ int ret; -+ -+ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | -+ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+ udelay(10); -+ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); -+ udelay(10); -+ val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+ udelay(10); -+ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); -+ -+ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, -+ val & HDPTX_O_PHY_CLK_RDY, 20, 400); -+ if (ret) { -+ dev_err(hdptx->dev, "Failed to get PHY clk ready: %d\n", ret); -+ return ret; -+ } -+ -+ dev_dbg(hdptx->dev, "PHY clk ready\n"); -+ -+ return 0; -+} -+ -+static int hdptx_post_power_up(struct rockchip_hdptx_phy *hdptx) -+{ -+ u32 val = 0; -+ int ret; -+ -+ val = (HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16 | -+ HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+ udelay(10); -+ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); -+ udelay(10); -+ val = HDPTX_I_PLL_EN << 16 | HDPTX_I_PLL_EN; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+ udelay(10); -+ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); -+ -+ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, -+ val & HDPTX_O_PLL_LOCK_DONE, 20, 400); -+ if (ret) { -+ dev_err(hdptx->dev, "Failed to get PHY PLL lock: %d\n", ret); -+ return ret; -+ } -+ -+ udelay(20); -+ reset_control_deassert(hdptx->rsts[RST_LANE].rstc); -+ -+ ret = regmap_read_poll_timeout(hdptx->grf, GRF_HDPTX_STATUS, val, -+ val & HDPTX_O_PHY_RDY, 100, 5000); -+ if (ret) { -+ dev_err(hdptx->dev, "Failed to get PHY ready: %d\n", ret); -+ return ret; -+ } -+ -+ dev_dbg(hdptx->dev, "PHY ready\n"); -+ -+ return 0; -+} -+ -+static void hdptx_phy_disable(struct rockchip_hdptx_phy *hdptx) -+{ -+ u32 val; -+ -+ /* reset phy and apb, or phy locked flag may keep 1 */ -+ reset_control_assert(hdptx->rsts[RST_PHY].rstc); -+ udelay(20); -+ reset_control_deassert(hdptx->rsts[RST_PHY].rstc); -+ -+ reset_control_assert(hdptx->rsts[RST_APB].rstc); -+ udelay(20); -+ reset_control_deassert(hdptx->rsts[RST_APB].rstc); -+ -+ hdptx_write(hdptx, LANE_REG0300, 0x82); -+ hdptx_write(hdptx, SB_REG010F, 0xc1); -+ hdptx_write(hdptx, SB_REG0110, 0x1); -+ hdptx_write(hdptx, LANE_REG0301, 0x80); -+ hdptx_write(hdptx, LANE_REG0401, 0x80); -+ hdptx_write(hdptx, LANE_REG0501, 0x80); -+ hdptx_write(hdptx, LANE_REG0601, 0x80); -+ -+ reset_control_assert(hdptx->rsts[RST_LANE].rstc); -+ reset_control_assert(hdptx->rsts[RST_CMN].rstc); -+ reset_control_assert(hdptx->rsts[RST_INIT].rstc); -+ -+ val = (HDPTX_I_PLL_EN | HDPTX_I_BIAS_EN | HDPTX_I_BGR_EN) << 16; -+ hdptx_grf_write(hdptx, GRF_HDPTX_CON0, val); -+} -+ -+static void hdptx_earc_config(struct rockchip_hdptx_phy *hdptx) -+{ -+ hdptx_update_bits(hdptx, SB_REG0113, SB_RX_RCAL_OPT_CODE_MASK, -+ SB_RX_RCAL_OPT_CODE(1)); -+ hdptx_write(hdptx, SB_REG011C, 0x04); -+ hdptx_update_bits(hdptx, SB_REG011B, SB_AFC_TOL_MASK, -+ SB_AFC_TOL(3)); -+ hdptx_write(hdptx, SB_REG0109, 0x05); -+ hdptx_update_bits(hdptx, SB_REG0120, SB_EARC_EN_MASK | SB_EARC_AFC_EN_MASK, -+ SB_EARC_EN(1) | SB_EARC_AFC_EN(1)); -+ hdptx_update_bits(hdptx, SB_REG011B, SB_EARC_SIG_DET_BYPASS_MASK, -+ SB_EARC_SIG_DET_BYPASS(1)); -+ hdptx_update_bits(hdptx, SB_REG011F, SB_PWM_AFC_CTRL_MASK | SB_RCAL_RSTN_MASK, -+ SB_PWM_AFC_CTRL(0xc) | SB_RCAL_RSTN(1)); -+ hdptx_update_bits(hdptx, SB_REG0115, SB_READY_DELAY_TIME_MASK, -+ SB_READY_DELAY_TIME(2)); -+ hdptx_update_bits(hdptx, SB_REG0113, SB_RX_RTERM_CTRL_MASK, -+ SB_RX_RTERM_CTRL(3)); -+ hdptx_update_bits(hdptx, SB_REG0102, ANA_SB_RXTERM_OFFSP_MASK, -+ ANA_SB_RXTERM_OFFSP(3)); -+ hdptx_update_bits(hdptx, SB_REG0103, ANA_SB_RXTERM_OFFSN_MASK, -+ ANA_SB_RXTERM_OFFSN(3)); -+ hdptx_write(hdptx, SB_REG011A, 0x03); -+ hdptx_write(hdptx, SB_REG0118, 0x0a); -+ hdptx_write(hdptx, SB_REG011E, 0x6a); -+ hdptx_write(hdptx, SB_REG011D, 0x67); -+ hdptx_update_bits(hdptx, SB_REG0117, FAST_PULSE_TIME_MASK, -+ FAST_PULSE_TIME(4)); -+ hdptx_update_bits(hdptx, SB_REG0114, SB_TG_SB_EN_DELAY_TIME_MASK | -+ SB_TG_RXTERM_EN_DELAY_TIME_MASK, -+ SB_TG_SB_EN_DELAY_TIME(2) | -+ SB_TG_RXTERM_EN_DELAY_TIME(2)); -+ hdptx_update_bits(hdptx, SB_REG0105, ANA_SB_TX_HLVL_PROG_MASK, -+ ANA_SB_TX_HLVL_PROG(7)); -+ hdptx_update_bits(hdptx, SB_REG0106, ANA_SB_TX_LLVL_PROG_MASK, -+ ANA_SB_TX_LLVL_PROG(7)); -+ hdptx_update_bits(hdptx, SB_REG010F, ANA_SB_VREG_GAIN_CTRL_MASK, -+ ANA_SB_VREG_GAIN_CTRL(0)); -+ hdptx_update_bits(hdptx, SB_REG0110, ANA_SB_VREG_REF_SEL_MASK, -+ ANA_SB_VREG_REF_SEL(1)); -+ hdptx_update_bits(hdptx, SB_REG0115, SB_TG_OSC_EN_DELAY_TIME_MASK, -+ SB_TG_OSC_EN_DELAY_TIME(2)); -+ hdptx_update_bits(hdptx, SB_REG0116, AFC_RSTN_DELAY_TIME_MASK, -+ AFC_RSTN_DELAY_TIME(2)); -+ hdptx_update_bits(hdptx, SB_REG0109, ANA_SB_DMRX_AFC_DIV_RATIO_MASK, -+ ANA_SB_DMRX_AFC_DIV_RATIO(5)); -+ hdptx_update_bits(hdptx, SB_REG0103, OVRD_SB_RX_RESCAL_DONE_MASK, -+ OVRD_SB_RX_RESCAL_DONE(1)); -+ hdptx_update_bits(hdptx, SB_REG0104, OVRD_SB_EN_MASK, -+ OVRD_SB_EN(1)); -+ hdptx_update_bits(hdptx, SB_REG0102, OVRD_SB_RXTERM_EN_MASK, -+ OVRD_SB_RXTERM_EN(1)); -+ hdptx_update_bits(hdptx, SB_REG0105, OVRD_SB_EARC_CMDC_EN_MASK, -+ OVRD_SB_EARC_CMDC_EN(1)); -+ hdptx_update_bits(hdptx, SB_REG010F, OVRD_SB_VREG_EN_MASK | -+ OVRD_SB_VREG_LPF_BYPASS_MASK, -+ OVRD_SB_VREG_EN(1) | OVRD_SB_VREG_LPF_BYPASS(1)); -+ hdptx_update_bits(hdptx, SB_REG0123, OVRD_SB_READY_MASK, -+ OVRD_SB_READY(1)); -+ udelay(1000); -+ hdptx_update_bits(hdptx, SB_REG0103, SB_RX_RESCAL_DONE_MASK, -+ SB_RX_RESCAL_DONE(1)); -+ udelay(50); -+ hdptx_update_bits(hdptx, SB_REG0104, SB_EN_MASK, SB_EN(1)); -+ udelay(50); -+ hdptx_update_bits(hdptx, SB_REG0102, SB_RXTERM_EN_MASK, -+ SB_RXTERM_EN(1)); -+ udelay(50); -+ hdptx_update_bits(hdptx, SB_REG0105, SB_EARC_CMDC_EN_MASK, -+ SB_EARC_CMDC_EN(1)); -+ hdptx_update_bits(hdptx, SB_REG010F, SB_VREG_EN_MASK, -+ SB_VREG_EN(1)); -+ udelay(50); -+ hdptx_update_bits(hdptx, SB_REG010F, OVRD_SB_VREG_LPF_BYPASS_MASK, -+ OVRD_SB_VREG_LPF_BYPASS(1)); -+ udelay(250); -+ hdptx_update_bits(hdptx, SB_REG010F, OVRD_SB_VREG_LPF_BYPASS_MASK, -+ OVRD_SB_VREG_LPF_BYPASS(0)); -+ udelay(100); -+ hdptx_update_bits(hdptx, SB_REG0123, SB_READY_MASK, SB_READY(1)); -+} -+ -+static bool hdptx_phy_clk_pll_calc(unsigned int data_rate, -+ struct ropll_config *cfg) -+{ -+ unsigned int fref = 24000; -+ unsigned int sdc; -+ unsigned int fout = data_rate / 2; -+ unsigned int fvco; -+ u32 mdiv, sdiv, n = 8; -+ unsigned long k = 0, lc, k_sub, lc_sub; -+ -+ for (sdiv = 16; sdiv >= 1; sdiv--) { -+ if (sdiv % 2 && sdiv != 1) -+ continue; -+ -+ fvco = fout * sdiv; -+ -+ if (fvco < 2000000 || fvco > 4000000) -+ continue; -+ -+ mdiv = DIV_ROUND_UP(fvco, fref); -+ if (mdiv < 20 || mdiv > 255) -+ continue; -+ -+ if (fref * mdiv - fvco) { -+ for (sdc = 264000; sdc <= 750000; sdc += fref) -+ if (sdc * n > fref * mdiv) -+ break; -+ -+ if (sdc > 750000) -+ continue; -+ -+ rational_best_approximation(fref * mdiv - fvco, -+ sdc / 16, -+ GENMASK(6, 0), -+ GENMASK(7, 0), -+ &k, &lc); -+ -+ rational_best_approximation(sdc * n - fref * mdiv, -+ sdc, -+ GENMASK(6, 0), -+ GENMASK(7, 0), -+ &k_sub, &lc_sub); -+ } -+ -+ break; -+ } -+ -+ if (sdiv < 1) -+ return false; -+ -+ if (cfg) { -+ cfg->pms_mdiv = mdiv; -+ cfg->pms_mdiv_afc = mdiv; -+ cfg->pms_pdiv = 1; -+ cfg->pms_refdiv = 1; -+ cfg->pms_sdiv = sdiv - 1; -+ -+ cfg->sdm_en = k > 0 ? 1 : 0; -+ if (cfg->sdm_en) { -+ cfg->sdm_deno = lc; -+ cfg->sdm_num_sign = 1; -+ cfg->sdm_num = k; -+ cfg->sdc_n = n - 3; -+ cfg->sdc_num = k_sub; -+ cfg->sdc_deno = lc_sub; -+ } -+ } -+ -+ return true; -+} -+ -+static int hdptx_ropll_cmn_config(struct rockchip_hdptx_phy *hdptx, unsigned long bit_rate) -+{ -+ int bus_width = phy_get_bus_width(hdptx->phy); -+ u8 color_depth = (bus_width & COLOR_DEPTH_MASK) ? 1 : 0; -+ const struct ropll_config *cfg = ropll_tmds_cfg; -+ struct ropll_config rc = {0}; -+ -+ dev_dbg(hdptx->dev, "%s bus_width:%x rate:%lu\n", __func__, bus_width, bit_rate); -+ -+ if (color_depth) -+ bit_rate = bit_rate * 10 / 8; -+ -+ for (; cfg->bit_rate != ~0; cfg++) -+ if (bit_rate == cfg->bit_rate) -+ break; -+ -+ if (cfg->bit_rate == ~0) { -+ if (hdptx_phy_clk_pll_calc(bit_rate, &rc)) { -+ cfg = &rc; -+ } else { -+ dev_err(hdptx->dev, "%s can't find pll cfg\n", __func__); -+ return -EINVAL; -+ } -+ } -+ -+ dev_dbg(hdptx->dev, "mdiv=%u, sdiv=%u\n", -+ cfg->pms_mdiv, cfg->pms_sdiv + 1); -+ dev_dbg(hdptx->dev, "sdm_en=%u, k_sign=%u, k=%u, lc=%u", -+ cfg->sdm_en, cfg->sdm_num_sign, cfg->sdm_num, cfg->sdm_deno); -+ dev_dbg(hdptx->dev, "n=%u, k_sub=%u, lc_sub=%u\n", -+ cfg->sdc_n + 3, cfg->sdc_num, cfg->sdc_deno); -+ -+ hdptx_pre_power_up(hdptx); -+ -+ reset_control_assert(hdptx->rsts[RST_ROPLL].rstc); -+ udelay(20); -+ reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc); -+ -+ hdptx_write(hdptx, CMN_REG0008, 0x00); -+ hdptx_write(hdptx, CMN_REG0009, 0x0c); -+ hdptx_write(hdptx, CMN_REG000A, 0x83); -+ hdptx_write(hdptx, CMN_REG000B, 0x06); -+ hdptx_write(hdptx, CMN_REG000C, 0x20); -+ hdptx_write(hdptx, CMN_REG000D, 0xb8); -+ hdptx_write(hdptx, CMN_REG000E, 0x0f); -+ hdptx_write(hdptx, CMN_REG000F, 0x0f); -+ hdptx_write(hdptx, CMN_REG0010, 0x04); -+ hdptx_write(hdptx, CMN_REG0011, 0x01); -+ hdptx_write(hdptx, CMN_REG0012, 0x26); -+ hdptx_write(hdptx, CMN_REG0013, 0x22); -+ hdptx_write(hdptx, CMN_REG0014, 0x24); -+ hdptx_write(hdptx, CMN_REG0015, 0x77); -+ hdptx_write(hdptx, CMN_REG0016, 0x08); -+ hdptx_write(hdptx, CMN_REG0017, 0x20); -+ hdptx_write(hdptx, CMN_REG0018, 0x04); -+ hdptx_write(hdptx, CMN_REG0019, 0x48); -+ hdptx_write(hdptx, CMN_REG001A, 0x01); -+ hdptx_write(hdptx, CMN_REG001B, 0x00); -+ hdptx_write(hdptx, CMN_REG001C, 0x01); -+ hdptx_write(hdptx, CMN_REG001D, 0x64); -+ hdptx_write(hdptx, CMN_REG001E, 0x14); -+ hdptx_write(hdptx, CMN_REG001F, 0x00); -+ hdptx_write(hdptx, CMN_REG0020, 0x00); -+ hdptx_write(hdptx, CMN_REG0021, 0x00); -+ hdptx_write(hdptx, CMN_REG0022, 0x11); -+ hdptx_write(hdptx, CMN_REG0023, 0x00); -+ hdptx_write(hdptx, CMN_REG0024, 0x00); -+ hdptx_write(hdptx, CMN_REG0025, 0x53); -+ hdptx_write(hdptx, CMN_REG0026, 0x00); -+ hdptx_write(hdptx, CMN_REG0027, 0x00); -+ hdptx_write(hdptx, CMN_REG0028, 0x01); -+ hdptx_write(hdptx, CMN_REG0029, 0x01); -+ hdptx_write(hdptx, CMN_REG002A, 0x00); -+ hdptx_write(hdptx, CMN_REG002B, 0x00); -+ hdptx_write(hdptx, CMN_REG002C, 0x00); -+ hdptx_write(hdptx, CMN_REG002D, 0x00); -+ hdptx_write(hdptx, CMN_REG002E, 0x04); -+ hdptx_write(hdptx, CMN_REG002F, 0x00); -+ hdptx_write(hdptx, CMN_REG0030, 0x20); -+ hdptx_write(hdptx, CMN_REG0031, 0x30); -+ hdptx_write(hdptx, CMN_REG0032, 0x0b); -+ hdptx_write(hdptx, CMN_REG0033, 0x23); -+ hdptx_write(hdptx, CMN_REG0034, 0x00); -+ hdptx_write(hdptx, CMN_REG0035, 0x00); -+ hdptx_write(hdptx, CMN_REG0038, 0x00); -+ hdptx_write(hdptx, CMN_REG0039, 0x00); -+ hdptx_write(hdptx, CMN_REG003A, 0x00); -+ hdptx_write(hdptx, CMN_REG003B, 0x00); -+ hdptx_write(hdptx, CMN_REG003C, 0x80); -+ hdptx_write(hdptx, CMN_REG003D, 0x40); -+ hdptx_write(hdptx, CMN_REG003E, 0x0c); -+ hdptx_write(hdptx, CMN_REG003F, 0x83); -+ hdptx_write(hdptx, CMN_REG0040, 0x06); -+ hdptx_write(hdptx, CMN_REG0041, 0x20); -+ hdptx_write(hdptx, CMN_REG0042, 0x78); -+ hdptx_write(hdptx, CMN_REG0043, 0x00); -+ hdptx_write(hdptx, CMN_REG0044, 0x46); -+ hdptx_write(hdptx, CMN_REG0045, 0x24); -+ hdptx_write(hdptx, CMN_REG0046, 0xff); -+ hdptx_write(hdptx, CMN_REG0047, 0x00); -+ hdptx_write(hdptx, CMN_REG0048, 0x44); -+ hdptx_write(hdptx, CMN_REG0049, 0xfa); -+ hdptx_write(hdptx, CMN_REG004A, 0x08); -+ hdptx_write(hdptx, CMN_REG004B, 0x00); -+ hdptx_write(hdptx, CMN_REG004C, 0x01); -+ hdptx_write(hdptx, CMN_REG004D, 0x64); -+ hdptx_write(hdptx, CMN_REG004E, 0x34); -+ hdptx_write(hdptx, CMN_REG004F, 0x00); -+ hdptx_write(hdptx, CMN_REG0050, 0x00); -+ -+ hdptx_write(hdptx, CMN_REG0051, cfg->pms_mdiv); -+ hdptx_write(hdptx, CMN_REG0055, cfg->pms_mdiv_afc); -+ -+ hdptx_write(hdptx, CMN_REG0059, (cfg->pms_pdiv << 4) | cfg->pms_refdiv); -+ -+ hdptx_write(hdptx, CMN_REG005A, (cfg->pms_sdiv << 4)); -+ -+ hdptx_write(hdptx, CMN_REG005C, 0x25); -+ hdptx_write(hdptx, CMN_REG005D, 0x0c); -+ hdptx_write(hdptx, CMN_REG005E, 0x4f); -+ hdptx_update_bits(hdptx, CMN_REG005E, ROPLL_SDM_EN_MASK, -+ ROPLL_SDM_EN(cfg->sdm_en)); -+ if (!cfg->sdm_en) -+ hdptx_update_bits(hdptx, CMN_REG005E, 0xf, 0); -+ -+ hdptx_write(hdptx, CMN_REG005F, 0x01); -+ -+ hdptx_update_bits(hdptx, CMN_REG0064, ROPLL_SDM_NUM_SIGN_RBR_MASK, -+ ROPLL_SDM_NUM_SIGN_RBR(cfg->sdm_num_sign)); -+ hdptx_write(hdptx, CMN_REG0065, cfg->sdm_num); -+ hdptx_write(hdptx, CMN_REG0060, cfg->sdm_deno); -+ -+ hdptx_update_bits(hdptx, CMN_REG0069, ROPLL_SDC_N_RBR_MASK, -+ ROPLL_SDC_N_RBR(cfg->sdc_n)); -+ -+ hdptx_write(hdptx, CMN_REG006C, cfg->sdc_num); -+ hdptx_write(hdptx, CMN_REG0070, cfg->sdc_deno); -+ -+ hdptx_write(hdptx, CMN_REG006B, 0x04); -+ -+ hdptx_write(hdptx, CMN_REG0073, 0x30); -+ hdptx_write(hdptx, CMN_REG0074, 0x04); -+ hdptx_write(hdptx, CMN_REG0075, 0x20); -+ hdptx_write(hdptx, CMN_REG0076, 0x30); -+ hdptx_write(hdptx, CMN_REG0077, 0x08); -+ hdptx_write(hdptx, CMN_REG0078, 0x0c); -+ hdptx_write(hdptx, CMN_REG0079, 0x00); -+ hdptx_write(hdptx, CMN_REG007B, 0x00); -+ hdptx_write(hdptx, CMN_REG007C, 0x00); -+ hdptx_write(hdptx, CMN_REG007D, 0x00); -+ hdptx_write(hdptx, CMN_REG007E, 0x00); -+ hdptx_write(hdptx, CMN_REG007F, 0x00); -+ hdptx_write(hdptx, CMN_REG0080, 0x00); -+ hdptx_write(hdptx, CMN_REG0081, 0x01); -+ hdptx_write(hdptx, CMN_REG0082, 0x04); -+ hdptx_write(hdptx, CMN_REG0083, 0x24); -+ hdptx_write(hdptx, CMN_REG0084, 0x20); -+ hdptx_write(hdptx, CMN_REG0085, 0x03); -+ -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_POSTDIV_SEL_MASK, -+ PLL_PCG_POSTDIV_SEL(cfg->pms_sdiv)); -+ -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_CLK_SEL_MASK, -+ PLL_PCG_CLK_SEL(color_depth)); -+ -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_CLK_EN, PLL_PCG_CLK_EN); -+ -+ hdptx_write(hdptx, CMN_REG0087, 0x04); -+ hdptx_write(hdptx, CMN_REG0089, 0x00); -+ hdptx_write(hdptx, CMN_REG008A, 0x55); -+ hdptx_write(hdptx, CMN_REG008B, 0x25); -+ hdptx_write(hdptx, CMN_REG008C, 0x2c); -+ hdptx_write(hdptx, CMN_REG008D, 0x22); -+ hdptx_write(hdptx, CMN_REG008E, 0x14); -+ hdptx_write(hdptx, CMN_REG008F, 0x20); -+ hdptx_write(hdptx, CMN_REG0090, 0x00); -+ hdptx_write(hdptx, CMN_REG0091, 0x00); -+ hdptx_write(hdptx, CMN_REG0092, 0x00); -+ hdptx_write(hdptx, CMN_REG0093, 0x00); -+ hdptx_write(hdptx, CMN_REG0095, 0x00); -+ hdptx_write(hdptx, CMN_REG0097, 0x02); -+ hdptx_write(hdptx, CMN_REG0099, 0x04); -+ hdptx_write(hdptx, CMN_REG009A, 0x11); -+ hdptx_write(hdptx, CMN_REG009B, 0x00); -+ -+ return hdptx_post_enable_pll(hdptx); -+} -+ -+static int hdptx_ropll_tmds_mode_config(struct rockchip_hdptx_phy *hdptx, u32 rate) -+{ -+ u32 bit_rate = rate & DATA_RATE_MASK; -+ -+ if (!(hdptx_grf_read(hdptx, GRF_HDPTX_STATUS) & HDPTX_O_PLL_LOCK_DONE)) { -+ int ret; -+ -+ ret = hdptx_ropll_cmn_config(hdptx, bit_rate); -+ if (ret) -+ return ret; -+ } -+ -+ hdptx_write(hdptx, SB_REG0114, 0x00); -+ hdptx_write(hdptx, SB_REG0115, 0x00); -+ hdptx_write(hdptx, SB_REG0116, 0x00); -+ hdptx_write(hdptx, SB_REG0117, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0200, 0x06); -+ -+ if (bit_rate >= 3400000) { -+ /* For 1/40 bitrate clk */ -+ hdptx_write(hdptx, LNTOP_REG0201, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0202, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0203, 0x0f); -+ hdptx_write(hdptx, LNTOP_REG0204, 0xff); -+ hdptx_write(hdptx, LNTOP_REG0205, 0xff); -+ } else { -+ /* For 1/10 bitrate clk */ -+ hdptx_write(hdptx, LNTOP_REG0201, 0x07); -+ hdptx_write(hdptx, LNTOP_REG0202, 0xc1); -+ hdptx_write(hdptx, LNTOP_REG0203, 0xf0); -+ hdptx_write(hdptx, LNTOP_REG0204, 0x7c); -+ hdptx_write(hdptx, LNTOP_REG0205, 0x1f); -+ } -+ -+ hdptx_write(hdptx, LNTOP_REG0206, 0x07); -+ hdptx_write(hdptx, LNTOP_REG0207, 0x0f); -+ hdptx_write(hdptx, LANE_REG0303, 0x0c); -+ hdptx_write(hdptx, LANE_REG0307, 0x20); -+ hdptx_write(hdptx, LANE_REG030A, 0x17); -+ hdptx_write(hdptx, LANE_REG030B, 0x77); -+ hdptx_write(hdptx, LANE_REG030C, 0x77); -+ hdptx_write(hdptx, LANE_REG030D, 0x77); -+ hdptx_write(hdptx, LANE_REG030E, 0x38); -+ hdptx_write(hdptx, LANE_REG0310, 0x03); -+ hdptx_write(hdptx, LANE_REG0311, 0x0f); -+ hdptx_write(hdptx, LANE_REG0312, 0x00); -+ hdptx_write(hdptx, LANE_REG0316, 0x02); -+ hdptx_write(hdptx, LANE_REG031B, 0x01); -+ hdptx_write(hdptx, LANE_REG031E, 0x00); -+ hdptx_write(hdptx, LANE_REG031F, 0x15); -+ hdptx_write(hdptx, LANE_REG0320, 0xa0); -+ hdptx_write(hdptx, LANE_REG0403, 0x0c); -+ hdptx_write(hdptx, LANE_REG0407, 0x20); -+ hdptx_write(hdptx, LANE_REG040A, 0x17); -+ hdptx_write(hdptx, LANE_REG040B, 0x77); -+ hdptx_write(hdptx, LANE_REG040C, 0x77); -+ hdptx_write(hdptx, LANE_REG040D, 0x77); -+ hdptx_write(hdptx, LANE_REG040E, 0x38); -+ hdptx_write(hdptx, LANE_REG0410, 0x03); -+ hdptx_write(hdptx, LANE_REG0411, 0x0f); -+ hdptx_write(hdptx, LANE_REG0412, 0x00); -+ hdptx_write(hdptx, LANE_REG0416, 0x02); -+ hdptx_write(hdptx, LANE_REG041B, 0x01); -+ hdptx_write(hdptx, LANE_REG041E, 0x00); -+ hdptx_write(hdptx, LANE_REG041F, 0x15); -+ hdptx_write(hdptx, LANE_REG0420, 0xa0); -+ hdptx_write(hdptx, LANE_REG0503, 0x0c); -+ hdptx_write(hdptx, LANE_REG0507, 0x20); -+ hdptx_write(hdptx, LANE_REG050A, 0x17); -+ hdptx_write(hdptx, LANE_REG050B, 0x77); -+ hdptx_write(hdptx, LANE_REG050C, 0x77); -+ hdptx_write(hdptx, LANE_REG050D, 0x77); -+ hdptx_write(hdptx, LANE_REG050E, 0x38); -+ hdptx_write(hdptx, LANE_REG0510, 0x03); -+ hdptx_write(hdptx, LANE_REG0511, 0x0f); -+ hdptx_write(hdptx, LANE_REG0512, 0x00); -+ hdptx_write(hdptx, LANE_REG0516, 0x02); -+ hdptx_write(hdptx, LANE_REG051B, 0x01); -+ hdptx_write(hdptx, LANE_REG051E, 0x00); -+ hdptx_write(hdptx, LANE_REG051F, 0x15); -+ hdptx_write(hdptx, LANE_REG0520, 0xa0); -+ hdptx_write(hdptx, LANE_REG0603, 0x0c); -+ hdptx_write(hdptx, LANE_REG0607, 0x20); -+ hdptx_write(hdptx, LANE_REG060A, 0x17); -+ hdptx_write(hdptx, LANE_REG060B, 0x77); -+ hdptx_write(hdptx, LANE_REG060C, 0x77); -+ hdptx_write(hdptx, LANE_REG060D, 0x77); -+ hdptx_write(hdptx, LANE_REG060E, 0x38); -+ hdptx_write(hdptx, LANE_REG0610, 0x03); -+ hdptx_write(hdptx, LANE_REG0611, 0x0f); -+ hdptx_write(hdptx, LANE_REG0612, 0x00); -+ hdptx_write(hdptx, LANE_REG0616, 0x02); -+ hdptx_write(hdptx, LANE_REG061B, 0x01); -+ hdptx_write(hdptx, LANE_REG061E, 0x08); -+ hdptx_write(hdptx, LANE_REG061F, 0x15); -+ hdptx_write(hdptx, LANE_REG0620, 0xa0); -+ -+ hdptx_write(hdptx, LANE_REG0303, 0x2f); -+ hdptx_write(hdptx, LANE_REG0403, 0x2f); -+ hdptx_write(hdptx, LANE_REG0503, 0x2f); -+ hdptx_write(hdptx, LANE_REG0603, 0x2f); -+ hdptx_write(hdptx, LANE_REG0305, 0x03); -+ hdptx_write(hdptx, LANE_REG0405, 0x03); -+ hdptx_write(hdptx, LANE_REG0505, 0x03); -+ hdptx_write(hdptx, LANE_REG0605, 0x03); -+ hdptx_write(hdptx, LANE_REG0306, 0x1c); -+ hdptx_write(hdptx, LANE_REG0406, 0x1c); -+ hdptx_write(hdptx, LANE_REG0506, 0x1c); -+ hdptx_write(hdptx, LANE_REG0606, 0x1c); -+ -+ if (hdptx->earc_en) -+ hdptx_earc_config(hdptx); -+ -+ return hdptx_post_enable_lane(hdptx); -+} -+ -+static int hdptx_ropll_frl_mode_config(struct rockchip_hdptx_phy *hdptx, u32 rate) -+{ -+ u32 bit_rate = rate & DATA_RATE_MASK; -+ u8 color_depth = (rate & COLOR_DEPTH_MASK) ? 1 : 0; -+ const struct ropll_config *cfg = ropll_frl_cfg; -+ -+ for (; cfg->bit_rate != ~0; cfg++) -+ if (bit_rate == cfg->bit_rate) -+ break; -+ -+ if (cfg->bit_rate == ~0) { -+ dev_err(hdptx->dev, "%s can't find pll cfg\n", __func__); -+ return -EINVAL; -+ } -+ -+ hdptx_pre_power_up(hdptx); -+ -+ reset_control_assert(hdptx->rsts[RST_ROPLL].rstc); -+ usleep_range(10, 20); -+ reset_control_deassert(hdptx->rsts[RST_ROPLL].rstc); -+ -+ hdptx_write(hdptx, CMN_REG0008, 0x00); -+ hdptx_write(hdptx, CMN_REG0009, 0x0c); -+ hdptx_write(hdptx, CMN_REG000A, 0x83); -+ hdptx_write(hdptx, CMN_REG000B, 0x06); -+ hdptx_write(hdptx, CMN_REG000C, 0x20); -+ hdptx_write(hdptx, CMN_REG000D, 0xb8); -+ hdptx_write(hdptx, CMN_REG000E, 0x0f); -+ hdptx_write(hdptx, CMN_REG000F, 0x0f); -+ hdptx_write(hdptx, CMN_REG0010, 0x04); -+ hdptx_write(hdptx, CMN_REG0011, 0x00); -+ hdptx_write(hdptx, CMN_REG0012, 0x26); -+ hdptx_write(hdptx, CMN_REG0013, 0x22); -+ hdptx_write(hdptx, CMN_REG0014, 0x24); -+ hdptx_write(hdptx, CMN_REG0015, 0x77); -+ hdptx_write(hdptx, CMN_REG0016, 0x08); -+ hdptx_write(hdptx, CMN_REG0017, 0x00); -+ hdptx_write(hdptx, CMN_REG0018, 0x04); -+ hdptx_write(hdptx, CMN_REG0019, 0x48); -+ hdptx_write(hdptx, CMN_REG001A, 0x01); -+ hdptx_write(hdptx, CMN_REG001B, 0x00); -+ hdptx_write(hdptx, CMN_REG001C, 0x01); -+ hdptx_write(hdptx, CMN_REG001D, 0x64); -+ hdptx_write(hdptx, CMN_REG001E, 0x14); -+ hdptx_write(hdptx, CMN_REG001F, 0x00); -+ hdptx_write(hdptx, CMN_REG0020, 0x00); -+ hdptx_write(hdptx, CMN_REG0021, 0x00); -+ hdptx_write(hdptx, CMN_REG0022, 0x11); -+ hdptx_write(hdptx, CMN_REG0023, 0x00); -+ hdptx_write(hdptx, CMN_REG0025, 0x00); -+ hdptx_write(hdptx, CMN_REG0026, 0x53); -+ hdptx_write(hdptx, CMN_REG0027, 0x00); -+ hdptx_write(hdptx, CMN_REG0028, 0x00); -+ hdptx_write(hdptx, CMN_REG0029, 0x01); -+ hdptx_write(hdptx, CMN_REG002A, 0x01); -+ hdptx_write(hdptx, CMN_REG002B, 0x00); -+ hdptx_write(hdptx, CMN_REG002C, 0x00); -+ hdptx_write(hdptx, CMN_REG002D, 0x00); -+ hdptx_write(hdptx, CMN_REG002E, 0x00); -+ hdptx_write(hdptx, CMN_REG002F, 0x04); -+ hdptx_write(hdptx, CMN_REG0030, 0x00); -+ hdptx_write(hdptx, CMN_REG0031, 0x20); -+ hdptx_write(hdptx, CMN_REG0032, 0x30); -+ hdptx_write(hdptx, CMN_REG0033, 0x0b); -+ hdptx_write(hdptx, CMN_REG0034, 0x23); -+ hdptx_write(hdptx, CMN_REG0035, 0x00); -+ hdptx_write(hdptx, CMN_REG0038, 0x00); -+ hdptx_write(hdptx, CMN_REG0039, 0x00); -+ hdptx_write(hdptx, CMN_REG003A, 0x00); -+ hdptx_write(hdptx, CMN_REG003B, 0x00); -+ hdptx_write(hdptx, CMN_REG003C, 0x80); -+ hdptx_write(hdptx, CMN_REG003D, 0x40); -+ hdptx_write(hdptx, CMN_REG003E, 0x0c); -+ hdptx_write(hdptx, CMN_REG003F, 0x83); -+ hdptx_write(hdptx, CMN_REG0040, 0x06); -+ hdptx_write(hdptx, CMN_REG0041, 0x20); -+ hdptx_write(hdptx, CMN_REG0042, 0xb8); -+ hdptx_write(hdptx, CMN_REG0043, 0x00); -+ hdptx_write(hdptx, CMN_REG0044, 0x46); -+ hdptx_write(hdptx, CMN_REG0045, 0x24); -+ hdptx_write(hdptx, CMN_REG0046, 0xff); -+ hdptx_write(hdptx, CMN_REG0047, 0x00); -+ hdptx_write(hdptx, CMN_REG0048, 0x44); -+ hdptx_write(hdptx, CMN_REG0049, 0xfa); -+ hdptx_write(hdptx, CMN_REG004A, 0x08); -+ hdptx_write(hdptx, CMN_REG004B, 0x00); -+ hdptx_write(hdptx, CMN_REG004C, 0x01); -+ hdptx_write(hdptx, CMN_REG004D, 0x64); -+ hdptx_write(hdptx, CMN_REG004E, 0x14); -+ hdptx_write(hdptx, CMN_REG004F, 0x00); -+ hdptx_write(hdptx, CMN_REG0050, 0x00); -+ hdptx_write(hdptx, CMN_REG0051, cfg->pms_mdiv); -+ hdptx_write(hdptx, CMN_REG0055, cfg->pms_mdiv_afc); -+ hdptx_write(hdptx, CMN_REG0059, (cfg->pms_pdiv << 4) | cfg->pms_refdiv); -+ hdptx_write(hdptx, CMN_REG005A, (cfg->pms_sdiv << 4)); -+ hdptx_write(hdptx, CMN_REG005C, 0x25); -+ hdptx_write(hdptx, CMN_REG005D, 0x0c); -+ hdptx_update_bits(hdptx, CMN_REG005E, ROPLL_SDM_EN_MASK, -+ ROPLL_SDM_EN(cfg->sdm_en)); -+ if (!cfg->sdm_en) -+ hdptx_update_bits(hdptx, CMN_REG005E, 0xf, 0); -+ hdptx_write(hdptx, CMN_REG005F, 0x01); -+ hdptx_update_bits(hdptx, CMN_REG0064, ROPLL_SDM_NUM_SIGN_RBR_MASK, -+ ROPLL_SDM_NUM_SIGN_RBR(cfg->sdm_num_sign)); -+ hdptx_write(hdptx, CMN_REG0065, cfg->sdm_num); -+ hdptx_write(hdptx, CMN_REG0060, cfg->sdm_deno); -+ hdptx_update_bits(hdptx, CMN_REG0069, ROPLL_SDC_N_RBR_MASK, -+ ROPLL_SDC_N_RBR(cfg->sdc_n)); -+ hdptx_write(hdptx, CMN_REG006C, cfg->sdc_num); -+ hdptx_write(hdptx, CMN_REG0070, cfg->sdc_deno); -+ hdptx_write(hdptx, CMN_REG006B, 0x04); -+ hdptx_write(hdptx, CMN_REG0073, 0x30); -+ hdptx_write(hdptx, CMN_REG0074, 0x00); -+ hdptx_write(hdptx, CMN_REG0075, 0x20); -+ hdptx_write(hdptx, CMN_REG0076, 0x30); -+ hdptx_write(hdptx, CMN_REG0077, 0x08); -+ hdptx_write(hdptx, CMN_REG0078, 0x0c); -+ hdptx_write(hdptx, CMN_REG0079, 0x00); -+ hdptx_write(hdptx, CMN_REG007B, 0x00); -+ hdptx_write(hdptx, CMN_REG007C, 0x00); -+ hdptx_write(hdptx, CMN_REG007D, 0x00); -+ hdptx_write(hdptx, CMN_REG007E, 0x00); -+ hdptx_write(hdptx, CMN_REG007F, 0x00); -+ hdptx_write(hdptx, CMN_REG0080, 0x00); -+ hdptx_write(hdptx, CMN_REG0081, 0x09); -+ hdptx_write(hdptx, CMN_REG0082, 0x04); -+ hdptx_write(hdptx, CMN_REG0083, 0x24); -+ hdptx_write(hdptx, CMN_REG0084, 0x20); -+ hdptx_write(hdptx, CMN_REG0085, 0x03); -+ hdptx_write(hdptx, CMN_REG0086, 0x01); -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_POSTDIV_SEL_MASK, -+ PLL_PCG_POSTDIV_SEL(cfg->pms_sdiv)); -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_CLK_SEL_MASK, -+ PLL_PCG_CLK_SEL(color_depth)); -+ hdptx_write(hdptx, CMN_REG0087, 0x0c); -+ hdptx_write(hdptx, CMN_REG0089, 0x00); -+ hdptx_write(hdptx, CMN_REG008A, 0x55); -+ hdptx_write(hdptx, CMN_REG008B, 0x25); -+ hdptx_write(hdptx, CMN_REG008C, 0x2c); -+ hdptx_write(hdptx, CMN_REG008D, 0x22); -+ hdptx_write(hdptx, CMN_REG008E, 0x14); -+ hdptx_write(hdptx, CMN_REG008F, 0x20); -+ hdptx_write(hdptx, CMN_REG0090, 0x00); -+ hdptx_write(hdptx, CMN_REG0091, 0x00); -+ hdptx_write(hdptx, CMN_REG0092, 0x00); -+ hdptx_write(hdptx, CMN_REG0093, 0x00); -+ hdptx_write(hdptx, CMN_REG0094, 0x00); -+ hdptx_write(hdptx, CMN_REG0097, 0x02); -+ hdptx_write(hdptx, CMN_REG0099, 0x04); -+ hdptx_write(hdptx, CMN_REG009A, 0x11); -+ hdptx_write(hdptx, CMN_REG009B, 0x10); -+ hdptx_write(hdptx, SB_REG0114, 0x00); -+ hdptx_write(hdptx, SB_REG0115, 0x00); -+ hdptx_write(hdptx, SB_REG0116, 0x00); -+ hdptx_write(hdptx, SB_REG0117, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0200, 0x04); -+ hdptx_write(hdptx, LNTOP_REG0201, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0202, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0203, 0xf0); -+ hdptx_write(hdptx, LNTOP_REG0204, 0xff); -+ hdptx_write(hdptx, LNTOP_REG0205, 0xff); -+ hdptx_write(hdptx, LNTOP_REG0206, 0x05); -+ hdptx_write(hdptx, LNTOP_REG0207, 0x0f); -+ hdptx_write(hdptx, LANE_REG0303, 0x0c); -+ hdptx_write(hdptx, LANE_REG0307, 0x20); -+ hdptx_write(hdptx, LANE_REG030A, 0x17); -+ hdptx_write(hdptx, LANE_REG030B, 0x77); -+ hdptx_write(hdptx, LANE_REG030C, 0x77); -+ hdptx_write(hdptx, LANE_REG030D, 0x77); -+ hdptx_write(hdptx, LANE_REG030E, 0x38); -+ hdptx_write(hdptx, LANE_REG0310, 0x03); -+ hdptx_write(hdptx, LANE_REG0311, 0x0f); -+ hdptx_write(hdptx, LANE_REG0312, 0x3c); -+ hdptx_write(hdptx, LANE_REG0316, 0x02); -+ hdptx_write(hdptx, LANE_REG031B, 0x01); -+ hdptx_write(hdptx, LANE_REG031F, 0x15); -+ hdptx_write(hdptx, LANE_REG0320, 0xa0); -+ hdptx_write(hdptx, LANE_REG0403, 0x0c); -+ hdptx_write(hdptx, LANE_REG0407, 0x20); -+ hdptx_write(hdptx, LANE_REG040A, 0x17); -+ hdptx_write(hdptx, LANE_REG040B, 0x77); -+ hdptx_write(hdptx, LANE_REG040C, 0x77); -+ hdptx_write(hdptx, LANE_REG040D, 0x77); -+ hdptx_write(hdptx, LANE_REG040E, 0x38); -+ hdptx_write(hdptx, LANE_REG0410, 0x03); -+ hdptx_write(hdptx, LANE_REG0411, 0x0f); -+ hdptx_write(hdptx, LANE_REG0412, 0x3c); -+ hdptx_write(hdptx, LANE_REG0416, 0x02); -+ hdptx_write(hdptx, LANE_REG041B, 0x01); -+ hdptx_write(hdptx, LANE_REG041F, 0x15); -+ hdptx_write(hdptx, LANE_REG0420, 0xa0); -+ hdptx_write(hdptx, LANE_REG0503, 0x0c); -+ hdptx_write(hdptx, LANE_REG0507, 0x20); -+ hdptx_write(hdptx, LANE_REG050A, 0x17); -+ hdptx_write(hdptx, LANE_REG050B, 0x77); -+ hdptx_write(hdptx, LANE_REG050C, 0x77); -+ hdptx_write(hdptx, LANE_REG050D, 0x77); -+ hdptx_write(hdptx, LANE_REG050E, 0x38); -+ hdptx_write(hdptx, LANE_REG0510, 0x03); -+ hdptx_write(hdptx, LANE_REG0511, 0x0f); -+ hdptx_write(hdptx, LANE_REG0512, 0x3c); -+ hdptx_write(hdptx, LANE_REG0516, 0x02); -+ hdptx_write(hdptx, LANE_REG051B, 0x01); -+ hdptx_write(hdptx, LANE_REG051F, 0x15); -+ hdptx_write(hdptx, LANE_REG0520, 0xa0); -+ hdptx_write(hdptx, LANE_REG0603, 0x0c); -+ hdptx_write(hdptx, LANE_REG0607, 0x20); -+ hdptx_write(hdptx, LANE_REG060A, 0x17); -+ hdptx_write(hdptx, LANE_REG060B, 0x77); -+ hdptx_write(hdptx, LANE_REG060C, 0x77); -+ hdptx_write(hdptx, LANE_REG060D, 0x77); -+ hdptx_write(hdptx, LANE_REG060E, 0x38); -+ hdptx_write(hdptx, LANE_REG0610, 0x03); -+ hdptx_write(hdptx, LANE_REG0611, 0x0f); -+ hdptx_write(hdptx, LANE_REG0612, 0x3c); -+ hdptx_write(hdptx, LANE_REG0616, 0x02); -+ hdptx_write(hdptx, LANE_REG061B, 0x01); -+ hdptx_write(hdptx, LANE_REG061F, 0x15); -+ hdptx_write(hdptx, LANE_REG0620, 0xa0); -+ -+ if (hdptx->earc_en) -+ hdptx_earc_config(hdptx); -+ -+ return hdptx_post_power_up(hdptx); -+} -+ -+static int hdptx_lcpll_frl_mode_config(struct rockchip_hdptx_phy *hdptx, u32 rate) -+{ -+ u32 bit_rate = rate & DATA_RATE_MASK; -+ u8 color_depth = (rate & COLOR_DEPTH_MASK) ? 1 : 0; -+ const struct lcpll_config *cfg = lcpll_cfg; -+ -+ for (; cfg->bit_rate != ~0; cfg++) -+ if (bit_rate == cfg->bit_rate) -+ break; -+ -+ if (cfg->bit_rate == ~0) -+ return -EINVAL; -+ -+ hdptx_pre_power_up(hdptx); -+ -+ hdptx_update_bits(hdptx, CMN_REG0008, LCPLL_EN_MASK | -+ LCPLL_LCVCO_MODE_EN_MASK, LCPLL_EN(1) | -+ LCPLL_LCVCO_MODE_EN(cfg->lcvco_mode_en)); -+ hdptx_write(hdptx, CMN_REG0009, 0x0c); -+ hdptx_write(hdptx, CMN_REG000A, 0x83); -+ hdptx_write(hdptx, CMN_REG000B, 0x06); -+ hdptx_write(hdptx, CMN_REG000C, 0x20); -+ hdptx_write(hdptx, CMN_REG000D, 0xb8); -+ hdptx_write(hdptx, CMN_REG000E, 0x0f); -+ hdptx_write(hdptx, CMN_REG000F, 0x0f); -+ hdptx_write(hdptx, CMN_REG0010, 0x04); -+ hdptx_write(hdptx, CMN_REG0011, 0x00); -+ hdptx_write(hdptx, CMN_REG0012, 0x26); -+ hdptx_write(hdptx, CMN_REG0013, 0x22); -+ hdptx_write(hdptx, CMN_REG0014, 0x24); -+ hdptx_write(hdptx, CMN_REG0015, 0x77); -+ hdptx_write(hdptx, CMN_REG0016, 0x08); -+ hdptx_write(hdptx, CMN_REG0017, 0x00); -+ hdptx_write(hdptx, CMN_REG0018, 0x04); -+ hdptx_write(hdptx, CMN_REG0019, 0x48); -+ hdptx_write(hdptx, CMN_REG001A, 0x01); -+ hdptx_write(hdptx, CMN_REG001B, 0x00); -+ hdptx_write(hdptx, CMN_REG001C, 0x01); -+ hdptx_write(hdptx, CMN_REG001D, 0x64); -+ hdptx_update_bits(hdptx, CMN_REG001E, LCPLL_PI_EN_MASK | -+ LCPLL_100M_CLK_EN_MASK, -+ LCPLL_PI_EN(cfg->pi_en) | -+ LCPLL_100M_CLK_EN(cfg->clk_en_100m)); -+ hdptx_write(hdptx, CMN_REG001F, 0x00); -+ hdptx_write(hdptx, CMN_REG0020, cfg->pms_mdiv); -+ hdptx_write(hdptx, CMN_REG0021, cfg->pms_mdiv_afc); -+ hdptx_write(hdptx, CMN_REG0022, (cfg->pms_pdiv << 4) | cfg->pms_refdiv); -+ hdptx_write(hdptx, CMN_REG0023, (cfg->pms_sdiv << 4) | cfg->pms_sdiv); -+ hdptx_write(hdptx, CMN_REG0025, 0x10); -+ hdptx_write(hdptx, CMN_REG0026, 0x53); -+ hdptx_write(hdptx, CMN_REG0027, 0x01); -+ hdptx_write(hdptx, CMN_REG0028, 0x0d); -+ hdptx_write(hdptx, CMN_REG0029, 0x01); -+ hdptx_write(hdptx, CMN_REG002A, cfg->sdm_deno); -+ hdptx_write(hdptx, CMN_REG002B, cfg->sdm_num_sign); -+ hdptx_write(hdptx, CMN_REG002C, cfg->sdm_num); -+ hdptx_update_bits(hdptx, CMN_REG002D, LCPLL_SDC_N_MASK, -+ LCPLL_SDC_N(cfg->sdc_n)); -+ hdptx_write(hdptx, CMN_REG002E, 0x02); -+ hdptx_write(hdptx, CMN_REG002F, 0x0d); -+ hdptx_write(hdptx, CMN_REG0030, 0x00); -+ hdptx_write(hdptx, CMN_REG0031, 0x20); -+ hdptx_write(hdptx, CMN_REG0032, 0x30); -+ hdptx_write(hdptx, CMN_REG0033, 0x0b); -+ hdptx_write(hdptx, CMN_REG0034, 0x23); -+ hdptx_write(hdptx, CMN_REG0035, 0x00); -+ hdptx_write(hdptx, CMN_REG0038, 0x00); -+ hdptx_write(hdptx, CMN_REG0039, 0x00); -+ hdptx_write(hdptx, CMN_REG003A, 0x00); -+ hdptx_write(hdptx, CMN_REG003B, 0x00); -+ hdptx_write(hdptx, CMN_REG003C, 0x80); -+ hdptx_write(hdptx, CMN_REG003D, 0x00); -+ hdptx_write(hdptx, CMN_REG003E, 0x0c); -+ hdptx_write(hdptx, CMN_REG003F, 0x83); -+ hdptx_write(hdptx, CMN_REG0040, 0x06); -+ hdptx_write(hdptx, CMN_REG0041, 0x20); -+ hdptx_write(hdptx, CMN_REG0042, 0xb8); -+ hdptx_write(hdptx, CMN_REG0043, 0x00); -+ hdptx_write(hdptx, CMN_REG0044, 0x46); -+ hdptx_write(hdptx, CMN_REG0045, 0x24); -+ hdptx_write(hdptx, CMN_REG0046, 0xff); -+ hdptx_write(hdptx, CMN_REG0047, 0x00); -+ hdptx_write(hdptx, CMN_REG0048, 0x44); -+ hdptx_write(hdptx, CMN_REG0049, 0xfa); -+ hdptx_write(hdptx, CMN_REG004A, 0x08); -+ hdptx_write(hdptx, CMN_REG004B, 0x00); -+ hdptx_write(hdptx, CMN_REG004C, 0x01); -+ hdptx_write(hdptx, CMN_REG004D, 0x64); -+ hdptx_write(hdptx, CMN_REG004E, 0x14); -+ hdptx_write(hdptx, CMN_REG004F, 0x00); -+ hdptx_write(hdptx, CMN_REG0050, 0x00); -+ hdptx_write(hdptx, CMN_REG0051, 0x00); -+ hdptx_write(hdptx, CMN_REG0055, 0x00); -+ hdptx_write(hdptx, CMN_REG0059, 0x11); -+ hdptx_write(hdptx, CMN_REG005A, 0x03); -+ hdptx_write(hdptx, CMN_REG005C, 0x05); -+ hdptx_write(hdptx, CMN_REG005D, 0x0c); -+ hdptx_write(hdptx, CMN_REG005E, 0x07); -+ hdptx_write(hdptx, CMN_REG005F, 0x01); -+ hdptx_write(hdptx, CMN_REG0060, 0x01); -+ hdptx_write(hdptx, CMN_REG0064, 0x07); -+ hdptx_write(hdptx, CMN_REG0065, 0x00); -+ hdptx_write(hdptx, CMN_REG0069, 0x00); -+ hdptx_write(hdptx, CMN_REG006B, 0x04); -+ hdptx_write(hdptx, CMN_REG006C, 0x00); -+ hdptx_write(hdptx, CMN_REG0070, 0x01); -+ hdptx_write(hdptx, CMN_REG0073, 0x30); -+ hdptx_write(hdptx, CMN_REG0074, 0x00); -+ hdptx_write(hdptx, CMN_REG0075, 0x20); -+ hdptx_write(hdptx, CMN_REG0076, 0x30); -+ hdptx_write(hdptx, CMN_REG0077, 0x08); -+ hdptx_write(hdptx, CMN_REG0078, 0x0c); -+ hdptx_write(hdptx, CMN_REG0079, 0x00); -+ hdptx_write(hdptx, CMN_REG007B, 0x00); -+ hdptx_write(hdptx, CMN_REG007C, 0x00); -+ hdptx_write(hdptx, CMN_REG007D, 0x00); -+ hdptx_write(hdptx, CMN_REG007E, 0x00); -+ hdptx_write(hdptx, CMN_REG007F, 0x00); -+ hdptx_write(hdptx, CMN_REG0080, 0x00); -+ hdptx_write(hdptx, CMN_REG0081, 0x09); -+ hdptx_write(hdptx, CMN_REG0082, 0x04); -+ hdptx_write(hdptx, CMN_REG0083, 0x24); -+ hdptx_write(hdptx, CMN_REG0084, 0x20); -+ hdptx_write(hdptx, CMN_REG0085, 0x03); -+ hdptx_write(hdptx, CMN_REG0086, 0x01); -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_POSTDIV_SEL_MASK, -+ PLL_PCG_POSTDIV_SEL(cfg->pms_sdiv)); -+ hdptx_update_bits(hdptx, CMN_REG0086, PLL_PCG_CLK_SEL_MASK, -+ PLL_PCG_CLK_SEL(color_depth)); -+ hdptx_write(hdptx, CMN_REG0087, 0x0c); -+ hdptx_write(hdptx, CMN_REG0089, 0x02); -+ hdptx_write(hdptx, CMN_REG008A, 0x55); -+ hdptx_write(hdptx, CMN_REG008B, 0x25); -+ hdptx_write(hdptx, CMN_REG008C, 0x2c); -+ hdptx_write(hdptx, CMN_REG008D, 0x22); -+ hdptx_write(hdptx, CMN_REG008E, 0x14); -+ hdptx_write(hdptx, CMN_REG008F, 0x20); -+ hdptx_write(hdptx, CMN_REG0090, 0x00); -+ hdptx_write(hdptx, CMN_REG0091, 0x00); -+ hdptx_write(hdptx, CMN_REG0092, 0x00); -+ hdptx_write(hdptx, CMN_REG0093, 0x00); -+ hdptx_write(hdptx, CMN_REG0095, 0x00); -+ hdptx_write(hdptx, CMN_REG0097, 0x00); -+ hdptx_write(hdptx, CMN_REG0099, 0x00); -+ hdptx_write(hdptx, CMN_REG009A, 0x11); -+ hdptx_write(hdptx, CMN_REG009B, 0x10); -+ hdptx_write(hdptx, SB_REG0114, 0x00); -+ hdptx_write(hdptx, SB_REG0115, 0x00); -+ hdptx_write(hdptx, SB_REG0116, 0x00); -+ hdptx_write(hdptx, SB_REG0117, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0200, 0x04); -+ hdptx_write(hdptx, LNTOP_REG0201, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0202, 0x00); -+ hdptx_write(hdptx, LNTOP_REG0203, 0xf0); -+ hdptx_write(hdptx, LNTOP_REG0204, 0xff); -+ hdptx_write(hdptx, LNTOP_REG0205, 0xff); -+ hdptx_write(hdptx, LNTOP_REG0206, 0x05); -+ hdptx_write(hdptx, LNTOP_REG0207, 0x0f); -+ hdptx_write(hdptx, LANE_REG0303, 0x0c); -+ hdptx_write(hdptx, LANE_REG0307, 0x20); -+ hdptx_write(hdptx, LANE_REG030A, 0x17); -+ hdptx_write(hdptx, LANE_REG030B, 0x77); -+ hdptx_write(hdptx, LANE_REG030C, 0x77); -+ hdptx_write(hdptx, LANE_REG030D, 0x77); -+ hdptx_write(hdptx, LANE_REG030E, 0x38); -+ hdptx_write(hdptx, LANE_REG0310, 0x03); -+ hdptx_write(hdptx, LANE_REG0311, 0x0f); -+ hdptx_write(hdptx, LANE_REG0312, 0x3c); -+ hdptx_write(hdptx, LANE_REG0316, 0x02); -+ hdptx_write(hdptx, LANE_REG031B, 0x01); -+ hdptx_write(hdptx, LANE_REG031F, 0x15); -+ hdptx_write(hdptx, LANE_REG0320, 0xa0); -+ hdptx_write(hdptx, LANE_REG0403, 0x0c); -+ hdptx_write(hdptx, LANE_REG0407, 0x20); -+ hdptx_write(hdptx, LANE_REG040A, 0x17); -+ hdptx_write(hdptx, LANE_REG040B, 0x77); -+ hdptx_write(hdptx, LANE_REG040C, 0x77); -+ hdptx_write(hdptx, LANE_REG040D, 0x77); -+ hdptx_write(hdptx, LANE_REG040E, 0x38); -+ hdptx_write(hdptx, LANE_REG0410, 0x03); -+ hdptx_write(hdptx, LANE_REG0411, 0x0f); -+ hdptx_write(hdptx, LANE_REG0412, 0x3c); -+ hdptx_write(hdptx, LANE_REG0416, 0x02); -+ hdptx_write(hdptx, LANE_REG041B, 0x01); -+ hdptx_write(hdptx, LANE_REG041F, 0x15); -+ hdptx_write(hdptx, LANE_REG0420, 0xa0); -+ hdptx_write(hdptx, LANE_REG0503, 0x0c); -+ hdptx_write(hdptx, LANE_REG0507, 0x20); -+ hdptx_write(hdptx, LANE_REG050A, 0x17); -+ hdptx_write(hdptx, LANE_REG050B, 0x77); -+ hdptx_write(hdptx, LANE_REG050C, 0x77); -+ hdptx_write(hdptx, LANE_REG050D, 0x77); -+ hdptx_write(hdptx, LANE_REG050E, 0x38); -+ hdptx_write(hdptx, LANE_REG0510, 0x03); -+ hdptx_write(hdptx, LANE_REG0511, 0x0f); -+ hdptx_write(hdptx, LANE_REG0512, 0x3c); -+ hdptx_write(hdptx, LANE_REG0516, 0x02); -+ hdptx_write(hdptx, LANE_REG051B, 0x01); -+ hdptx_write(hdptx, LANE_REG051F, 0x15); -+ hdptx_write(hdptx, LANE_REG0520, 0xa0); -+ hdptx_write(hdptx, LANE_REG0603, 0x0c); -+ hdptx_write(hdptx, LANE_REG0607, 0x20); -+ hdptx_write(hdptx, LANE_REG060A, 0x17); -+ hdptx_write(hdptx, LANE_REG060B, 0x77); -+ hdptx_write(hdptx, LANE_REG060C, 0x77); -+ hdptx_write(hdptx, LANE_REG060D, 0x77); -+ hdptx_write(hdptx, LANE_REG060E, 0x38); -+ hdptx_write(hdptx, LANE_REG0610, 0x03); -+ hdptx_write(hdptx, LANE_REG0611, 0x0f); -+ hdptx_write(hdptx, LANE_REG0612, 0x3c); -+ hdptx_write(hdptx, LANE_REG0616, 0x02); -+ hdptx_write(hdptx, LANE_REG061B, 0x01); -+ hdptx_write(hdptx, LANE_REG061F, 0x15); -+ hdptx_write(hdptx, LANE_REG0620, 0xa0); -+ -+ hdptx_write(hdptx, LANE_REG0303, 0x2f); -+ hdptx_write(hdptx, LANE_REG0403, 0x2f); -+ hdptx_write(hdptx, LANE_REG0503, 0x2f); -+ hdptx_write(hdptx, LANE_REG0603, 0x2f); -+ hdptx_write(hdptx, LANE_REG0305, 0x03); -+ hdptx_write(hdptx, LANE_REG0405, 0x03); -+ hdptx_write(hdptx, LANE_REG0505, 0x03); -+ hdptx_write(hdptx, LANE_REG0605, 0x03); -+ hdptx_write(hdptx, LANE_REG0306, 0xfc); -+ hdptx_write(hdptx, LANE_REG0406, 0xfc); -+ hdptx_write(hdptx, LANE_REG0506, 0xfc); -+ hdptx_write(hdptx, LANE_REG0606, 0xfc); -+ -+ hdptx_write(hdptx, LANE_REG0305, 0x4f); -+ hdptx_write(hdptx, LANE_REG0405, 0x4f); -+ hdptx_write(hdptx, LANE_REG0505, 0x4f); -+ hdptx_write(hdptx, LANE_REG0605, 0x4f); -+ hdptx_write(hdptx, LANE_REG0304, 0x14); -+ hdptx_write(hdptx, LANE_REG0404, 0x14); -+ hdptx_write(hdptx, LANE_REG0504, 0x14); -+ hdptx_write(hdptx, LANE_REG0604, 0x14); -+ -+ if (hdptx->earc_en) -+ hdptx_earc_config(hdptx); -+ -+ return hdptx_post_power_up(hdptx); -+} -+ -+static int rockchip_hdptx_phy_power_on(struct phy *phy) -+{ -+ struct rockchip_hdptx_phy *hdptx = phy_get_drvdata(phy); -+ int bus_width = phy_get_bus_width(hdptx->phy); -+ int bit_rate = bus_width & DATA_RATE_MASK; -+ int ret; -+ -+ ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); -+ if (ret) { -+ dev_err(hdptx->dev, "failed to enable clocks\n"); -+ return ret; -+ } -+ -+ dev_dbg(hdptx->dev, "%s bus_width:%x rate:%d\n", -+ __func__, bus_width, bit_rate); -+ -+ if (bus_width & HDMI_EARC_MASK) -+ hdptx->earc_en = true; -+ else -+ hdptx->earc_en = false; -+ -+ if (bus_width & HDMI_MODE_MASK) { -+ if (bit_rate > 24000000) -+ return hdptx_lcpll_frl_mode_config(hdptx, bus_width); -+ -+ return hdptx_ropll_frl_mode_config(hdptx, bus_width); -+ } -+ -+ return hdptx_ropll_tmds_mode_config(hdptx, bus_width); -+} -+ -+static int rockchip_hdptx_phy_power_off(struct phy *phy) -+{ -+ struct rockchip_hdptx_phy *hdptx = phy_get_drvdata(phy); -+ -+ if (!(hdptx_grf_read(hdptx, GRF_HDPTX_STATUS) & HDPTX_O_PLL_LOCK_DONE)) -+ return 0; -+ -+ hdptx_phy_disable(hdptx); -+ clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks); -+ -+ return 0; -+} -+ -+static const struct phy_ops rockchip_hdptx_phy_ops = { -+ .owner = THIS_MODULE, -+ .power_on = rockchip_hdptx_phy_power_on, -+ .power_off = rockchip_hdptx_phy_power_off, -+}; -+ -+static void rockchip_hdptx_phy_runtime_disable(void *data) -+{ -+ struct rockchip_hdptx_phy *hdptx = data; -+ -+ clk_bulk_unprepare(hdptx->nr_clks, hdptx->clks); -+ pm_runtime_disable(hdptx->dev); -+} -+ -+static int rockchip_hdptx_phy_probe(struct platform_device *pdev) -+{ -+ struct device *dev = &pdev->dev; -+ struct device_node *np = dev->of_node; -+ struct rockchip_hdptx_phy *hdptx; -+ struct phy_provider *phy_provider; -+ void __iomem *regs; -+ int ret; -+ -+ hdptx = devm_kzalloc(dev, sizeof(*hdptx), GFP_KERNEL); -+ if (!hdptx) -+ return -ENOMEM; -+ -+ hdptx->dev = dev; -+ -+ regs = devm_platform_ioremap_resource(pdev, 0); -+ if (IS_ERR(regs)) -+ return dev_err_probe(dev, PTR_ERR(regs), -+ "Failed to ioremap resource\n"); -+ -+ ret = devm_clk_bulk_get_all(dev, &hdptx->clks); -+ if (ret < 0) -+ return dev_err_probe(dev, ret, "failed to get clocks\n"); -+ if (ret == 0) -+ return dev_err_probe(dev, -EINVAL, "missing clocks\n"); -+ -+ hdptx->nr_clks = ret; -+ -+ hdptx->regmap = devm_regmap_init_mmio(dev, regs, -+ &rockchip_hdptx_phy_regmap_config); -+ if (IS_ERR(hdptx->regmap)) -+ return dev_err_probe(dev, PTR_ERR(hdptx->regmap), -+ "failed to init regmap\n"); -+ -+ hdptx->rsts[RST_PHY].id = "phy"; -+ hdptx->rsts[RST_APB].id = "apb"; -+ hdptx->rsts[RST_INIT].id = "init"; -+ hdptx->rsts[RST_CMN].id = "cmn"; -+ hdptx->rsts[RST_LANE].id = "lane"; -+ hdptx->rsts[RST_ROPLL].id = "ropll"; -+ hdptx->rsts[RST_LCPLL].id = "lcpll"; -+ -+ ret = devm_reset_control_bulk_get_exclusive(dev, RST_MAX, hdptx->rsts); -+ if (ret) -+ return dev_err_probe(dev, ret, "failed to get resets\n"); -+ -+ hdptx->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf"); -+ if (IS_ERR(hdptx->grf)) -+ return dev_err_probe(dev, PTR_ERR(hdptx->grf), -+ "Unable to get rockchip,grf\n"); -+ -+ hdptx->phy = devm_phy_create(dev, NULL, &rockchip_hdptx_phy_ops); -+ if (IS_ERR(hdptx->phy)) -+ return dev_err_probe(dev, PTR_ERR(hdptx->phy), -+ "failed to create HDMI PHY\n"); -+ -+ phy_set_drvdata(hdptx->phy, hdptx); -+ phy_set_bus_width(hdptx->phy, 8); -+ -+ pm_runtime_enable(dev); -+ ret = devm_add_action_or_reset(dev, rockchip_hdptx_phy_runtime_disable, -+ hdptx); -+ if (ret) -+ return ret; -+ -+ phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); -+ if (IS_ERR(phy_provider)) -+ return dev_err_probe(dev, PTR_ERR(phy_provider), -+ "failed to register PHY provider\n"); -+ -+ reset_control_deassert(hdptx->rsts[RST_APB].rstc); -+ reset_control_deassert(hdptx->rsts[RST_CMN].rstc); -+ reset_control_deassert(hdptx->rsts[RST_INIT].rstc); -+ -+ platform_set_drvdata(pdev, hdptx); -+ -+ return 0; -+} -+ -+static const struct of_device_id rockchip_hdptx_phy_of_match[] = { -+ { .compatible = "rockchip,rk3588-hdptx-phy", }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, rockchip_hdptx_phy_of_match); -+ -+static struct platform_driver rockchip_hdptx_phy_driver = { -+ .probe = rockchip_hdptx_phy_probe, -+ .driver = { -+ .name = "rockchip-hdptx-phy", -+ .of_match_table = of_match_ptr(rockchip_hdptx_phy_of_match), -+ }, -+}; -+module_platform_driver(rockchip_hdptx_phy_driver); -+ -+MODULE_DESCRIPTION("Samsung HDMI/DP Transmitter Combo PHY Driver"); -+MODULE_LICENSE("GPL v2"); --- -2.42.0 - - -From 909c59fc704f474702f1484f63023c6a6c44f3de Mon Sep 17 00:00:00 2001 +From 2e497da451cac6de8b56f236d5a48469bc165735 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 16 Jan 2024 19:27:40 +0200 -Subject: [PATCH 41/81] phy: phy-rockchip-samsung-hdptx-hdmi: Add clock +Subject: [PATCH 54/71] phy: phy-rockchip-samsung-hdptx-hdmi: Add clock provider The HDMI PHY PLL can be used as an alternative dclk source to SoC CRU. @@ -8792,25 +22657,34 @@ support HDMI 2.1 4K@120Hz mode. Signed-off-by: Cristian Ciocaltea --- - .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 153 +++++++++++++++++- - 1 file changed, 149 insertions(+), 4 deletions(-) + .../phy/rockchip/phy-rockchip-samsung-hdptx.c | 155 ++++++++++++++++++ + 1 file changed, 155 insertions(+) diff --git a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c -index eece301c646f..2ff08c8adb66 100644 +index 946c01210ac8..daf2d0b05d8b 100644 --- a/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c +++ b/drivers/phy/rockchip/phy-rockchip-samsung-hdptx.c -@@ -7,6 +7,7 @@ - * Author: Cristian Ciocaltea +@@ -8,6 +8,7 @@ */ + #include #include +#include #include #include #include -@@ -705,6 +706,12 @@ struct rockchip_hdptx_phy { +@@ -190,6 +191,8 @@ + #define LN3_TX_SER_RATE_SEL_HBR2 BIT(3) + #define LN3_TX_SER_RATE_SEL_HBR3 BIT(2) + ++#define HDMI20_MAX_RATE 600000000 ++ + struct lcpll_config { + u32 bit_rate; + u8 lcvco_mode_en; +@@ -272,6 +275,12 @@ struct rk_hdptx_phy { + struct clk_bulk_data *clks; int nr_clks; struct reset_control_bulk_data rsts[RST_MAX]; - bool earc_en; + + /* clk provider */ + struct clk_hw hw; @@ -8819,61 +22693,35 @@ index eece301c646f..2ff08c8adb66 100644 + int count; }; - static const struct lcpll_config lcpll_cfg[] = { -@@ -801,6 +808,11 @@ static const struct regmap_config rockchip_hdptx_phy_regmap_config = { - .max_register = 0x18b4, - }; + static const struct ropll_config ropll_tmds_cfg[] = { +@@ -566,6 +575,11 @@ static bool rk_hdptx_phy_is_rw_reg(struct device *dev, unsigned int reg) + return false; + } -+static struct rockchip_hdptx_phy *to_rockchip_hdptx_phy(struct clk_hw *hw) ++static struct rk_hdptx_phy *to_rk_hdptx_phy(struct clk_hw *hw) +{ -+ return container_of(hw, struct rockchip_hdptx_phy, hw); ++ return container_of(hw, struct rk_hdptx_phy, hw); +} + - static int hdptx_write(struct rockchip_hdptx_phy *hdptx, u32 reg, u8 val) - { - return regmap_write(hdptx->regmap, reg, val); -@@ -1123,6 +1135,7 @@ static int hdptx_ropll_cmn_config(struct rockchip_hdptx_phy *hdptx, unsigned lon + static const struct regmap_config rk_hdptx_phy_regmap_config = { + .reg_bits = 32, + .reg_stride = 4, +@@ -759,6 +773,8 @@ static int rk_hdptx_ropll_tmds_cmn_config(struct rk_hdptx_phy *hdptx, struct ropll_config rc = {0}; + int i; - dev_dbg(hdptx->dev, "%s bus_width:%x rate:%lu\n", __func__, bus_width, bit_rate); -+ hdptx->rate = bit_rate * 100; - - if (color_depth) - bit_rate = bit_rate * 10 / 8; -@@ -1895,10 +1908,12 @@ static int rockchip_hdptx_phy_power_on(struct phy *phy) - int bit_rate = bus_width & DATA_RATE_MASK; - int ret; - -- ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); -- if (ret) { -- dev_err(hdptx->dev, "failed to enable clocks\n"); -- return ret; -+ if (!hdptx->count) { -+ ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); -+ if (ret) { -+ dev_err(hdptx->dev, "failed to enable clocks\n"); -+ return ret; -+ } - } - - dev_dbg(hdptx->dev, "%s bus_width:%x rate:%d\n", -@@ -1923,6 +1938,9 @@ static int rockchip_hdptx_phy_power_off(struct phy *phy) - { - struct rockchip_hdptx_phy *hdptx = phy_get_drvdata(phy); - -+ if (hdptx->count) -+ return 0; ++ hdptx->rate = rate * 100; + - if (!(hdptx_grf_read(hdptx, GRF_HDPTX_STATUS) & HDPTX_O_PLL_LOCK_DONE)) - return 0; - -@@ -1946,6 +1964,125 @@ static void rockchip_hdptx_phy_runtime_disable(void *data) - pm_runtime_disable(hdptx->dev); + for (i = 0; i < ARRAY_SIZE(ropll_tmds_cfg); i++) + if (rate == ropll_tmds_cfg[i].bit_rate) { + cfg = &ropll_tmds_cfg[i]; +@@ -925,6 +941,133 @@ static int rk_hdptx_phy_runtime_resume(struct device *dev) + return ret; } +static int hdptx_phy_clk_enable(struct clk_hw *hw) +{ -+ struct rockchip_hdptx_phy *hdptx = to_rockchip_hdptx_phy(hw); ++ struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw); + int ret; + + if (hdptx->count) { @@ -8881,14 +22729,14 @@ index eece301c646f..2ff08c8adb66 100644 + return 0; + } + -+ ret = clk_bulk_prepare_enable(hdptx->nr_clks, hdptx->clks); ++ ret = pm_runtime_resume_and_get(hdptx->dev); + if (ret) { -+ dev_err(hdptx->dev, "failed to enable clocks\n"); ++ dev_err(hdptx->dev, "Failed to resume phy: %d\n", ret); + return ret; + } + + if (hdptx->rate) { -+ ret = hdptx_ropll_cmn_config(hdptx, hdptx->rate / 100); ++ ret = rk_hdptx_ropll_tmds_cmn_config(hdptx, hdptx->rate / 100); + if (ret < 0) { + dev_err(hdptx->dev, "Failed to init HDMI PHY PLL\n"); + return ret; @@ -8902,24 +22750,28 @@ index eece301c646f..2ff08c8adb66 100644 + +static void hdptx_phy_clk_disable(struct clk_hw *hw) +{ -+ struct rockchip_hdptx_phy *hdptx = to_rockchip_hdptx_phy(hw); ++ struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw); ++ int val, ret; + + if (hdptx->count > 1) { + hdptx->count--; + return; + } + -+ if (hdptx_grf_read(hdptx, GRF_HDPTX_STATUS) & HDPTX_O_PLL_LOCK_DONE) -+ hdptx_phy_disable(hdptx); ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret) ++ return; ++ if (val & HDPTX_O_PLL_LOCK_DONE) ++ rk_hdptx_phy_disable(hdptx); + -+ clk_bulk_disable_unprepare(hdptx->nr_clks, hdptx->clks); ++ pm_runtime_put(hdptx->dev); + hdptx->count--; +} + +static unsigned long hdptx_phy_clk_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ -+ struct rockchip_hdptx_phy *hdptx = to_rockchip_hdptx_phy(hw); ++ struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw); + + return hdptx->rate; +} @@ -8937,7 +22789,7 @@ index eece301c646f..2ff08c8adb66 100644 + if (bit_rate == cfg->bit_rate) + break; + -+ if (cfg->bit_rate == ~0 && !hdptx_phy_clk_pll_calc(bit_rate, NULL)) ++ if (cfg->bit_rate == ~0 && !rk_hdptx_phy_clk_pll_calc(bit_rate, NULL)) + return -EINVAL; + + return rate; @@ -8946,12 +22798,16 @@ index eece301c646f..2ff08c8adb66 100644 +static int hdptx_phy_clk_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ -+ struct rockchip_hdptx_phy *hdptx = to_rockchip_hdptx_phy(hw); ++ struct rk_hdptx_phy *hdptx = to_rk_hdptx_phy(hw); ++ int val, ret; + -+ if (hdptx_grf_read(hdptx, GRF_HDPTX_STATUS) & HDPTX_O_PLL_LOCK_DONE) -+ hdptx_phy_disable(hdptx); ++ ret = regmap_read(hdptx->grf, GRF_HDPTX_STATUS, &val); ++ if (ret) ++ return ret; ++ if (val & HDPTX_O_PLL_LOCK_DONE) ++ rk_hdptx_phy_disable(hdptx); + -+ return hdptx_ropll_cmn_config(hdptx, rate / 100); ++ return rk_hdptx_ropll_tmds_cmn_config(hdptx, rate / 100); +} + +static const struct clk_ops hdptx_phy_clk_ops = { @@ -8962,7 +22818,7 @@ index eece301c646f..2ff08c8adb66 100644 + .set_rate = hdptx_phy_clk_set_rate, +}; + -+static int rockchip_hdptx_phy_clk_register(struct rockchip_hdptx_phy *hdptx) ++static int rk_hdptx_phy_clk_register(struct rk_hdptx_phy *hdptx) +{ + struct device *dev = hdptx->dev; + const char *name, *pname; @@ -8990,13 +22846,17 @@ index eece301c646f..2ff08c8adb66 100644 + return 0; +} + - static int rockchip_hdptx_phy_probe(struct platform_device *pdev) + static int rk_hdptx_phy_probe(struct platform_device *pdev) { - struct device *dev = &pdev->dev; -@@ -1961,6 +2098,10 @@ static int rockchip_hdptx_phy_probe(struct platform_device *pdev) + struct phy_provider *phy_provider; +@@ -939,6 +1082,14 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev) hdptx->dev = dev; ++ // TODO: FIXME: It's not acceptable to abuse the alias ID in this way. ++ // The proper solution to get the ID is by looking up the device address ++ // from the DT "reg" property and map it. Examples for this are available ++ // in various other Rockchip drivers, e.g. the RK3588 USBDP PHY. + hdptx->id = of_alias_get_id(dev->of_node, "hdptxphy"); + if (hdptx->id < 0) + hdptx->id = 0; @@ -9004,25 +22864,25 @@ index eece301c646f..2ff08c8adb66 100644 regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(regs)) return dev_err_probe(dev, PTR_ERR(regs), -@@ -2020,6 +2161,10 @@ static int rockchip_hdptx_phy_probe(struct platform_device *pdev) +@@ -998,6 +1149,10 @@ static int rk_hdptx_phy_probe(struct platform_device *pdev) reset_control_deassert(hdptx->rsts[RST_CMN].rstc); reset_control_deassert(hdptx->rsts[RST_INIT].rstc); -+ ret = rockchip_hdptx_phy_clk_register(hdptx); ++ ret = rk_hdptx_phy_clk_register(hdptx); + if (ret) + return ret; + - platform_set_drvdata(pdev, hdptx); - return 0; + } + -- 2.42.0 -From ddbca7787878ca637ac3b220d08392807180bc1a Mon Sep 17 00:00:00 2001 +From c30d732d374138becbffb7c12bb3bfc81dbc88c0 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 1 Nov 2023 18:50:38 +0200 -Subject: [PATCH 42/81] [WIP] drm/bridge: synopsys: Add initial support for DW +Subject: [PATCH 55/71] [WIP] drm/bridge: synopsys: Add initial support for DW HDMI QP TX Controller Co-developed-by: Algea Cao @@ -15634,65 +29494,10 @@ index 6a46baa0737c..ac4e418c1c4e 100644 2.42.0 -From 3859034498523a5a410f4c0f71b78792ab236034 Mon Sep 17 00:00:00 2001 -From: Cristian Ciocaltea -Date: Mon, 15 Jan 2024 22:38:45 +0200 -Subject: [PATCH 43/81] arm64: dts: rockchip: Add HDMI0 PHY to rk3588 - -Add DT nodes for HDMI0 PHY and related syscon found on RK3588 SoC. - -Signed-off-by: Cristian Ciocaltea ---- - arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 21 +++++++++++++++++++++ - 1 file changed, 21 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 2f3db2e466db..3bcb11e74e8d 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -1101,6 +1101,11 @@ u2phy3_host: host-port { - }; - }; - -+ hdptxphy0_grf: syscon@fd5e0000 { -+ compatible = "rockchip,rk3588-hdptxphy-grf", "syscon"; -+ reg = <0x0 0xfd5e0000 0x0 0x100>; -+ }; -+ - ioc: syscon@fd5f0000 { - compatible = "rockchip,rk3588-ioc", "syscon"; - reg = <0x0 0xfd5f0000 0x0 0x10000>; -@@ -2874,6 +2879,22 @@ dmac2: dma-controller@fed10000 { - #dma-cells = <1>; - }; - -+ hdptxphy_hdmi0: phy@fed60000 { -+ compatible = "rockchip,rk3588-hdptx-phy"; -+ reg = <0x0 0xfed60000 0x0 0x2000>; -+ clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>; -+ clock-names = "ref", "apb"; -+ #phy-cells = <0>; -+ resets = <&cru SRST_HDPTX0>, <&cru SRST_P_HDPTX0>, -+ <&cru SRST_HDPTX0_INIT>, <&cru SRST_HDPTX0_CMN>, -+ <&cru SRST_HDPTX0_LANE>, <&cru SRST_HDPTX0_ROPLL>, -+ <&cru SRST_HDPTX0_LCPLL>; -+ reset-names = "phy", "apb", "init", "cmn", "lane", "ropll", -+ "lcpll"; -+ rockchip,grf = <&hdptxphy0_grf>; -+ status = "disabled"; -+ }; -+ - usbdp_phy0: phy@fed80000 { - compatible = "rockchip,rk3588-usbdp-phy"; - reg = <0x0 0xfed80000 0x0 0x10000>; --- -2.42.0 - - -From 52a69633315ec5dde43ec58633c3e9bd8b084468 Mon Sep 17 00:00:00 2001 +From ef8d64c80b83bed3381925ba4e92ea0b57f408c5 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Mon, 15 Jan 2024 22:47:41 +0200 -Subject: [PATCH 44/81] arm64: dts: rockchip: Add HDMI0 bridge to rk3588 +Subject: [PATCH 56/71] arm64: dts: rockchip: Add HDMI0 bridge to rk3588 Add DT node for the HDMI0 bridge found on RK3588 SoC. @@ -15702,10 +29507,10 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 55 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 3bcb11e74e8d..33762fd61483 100644 +index e167949f8b9a..73a226dffb2d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -1649,6 +1649,61 @@ i2s9_8ch: i2s@fddfc000 { +@@ -1591,6 +1591,61 @@ i2s9_8ch: i2s@fddfc000 { status = "disabled"; }; @@ -15771,10 +29576,10 @@ index 3bcb11e74e8d..33762fd61483 100644 2.42.0 -From 9b92592035c21bb5016dc5ad5e8d745fb0daef57 Mon Sep 17 00:00:00 2001 +From 34a982a88264036246234df2ec7df1692722da53 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Mon, 15 Jan 2024 22:51:17 +0200 -Subject: [PATCH 45/81] arm64: dts: rockchip: Enable HDMI0 on rock-5b +Subject: [PATCH 57/71] arm64: dts: rockchip: Enable HDMI0 on rock-5b Add the necessary DT changes to enable HDMI0 on Rock 5B. @@ -15784,7 +29589,7 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 30 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index 6c1f1e867cd0..86f45719457c 100644 +index 41d2a0870d9f..a0fa27545ee9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -4,6 +4,7 @@ @@ -15795,8 +29600,8 @@ index 6c1f1e867cd0..86f45719457c 100644 #include #include "rk3588.dtsi" -@@ -192,6 +193,20 @@ &cpu_l3 { - mem-supply = <&vdd_cpu_lit_mem_s0>; +@@ -184,6 +185,20 @@ &cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; }; +&hdmi0 { @@ -15816,7 +29621,7 @@ index 6c1f1e867cd0..86f45719457c 100644 &i2c0 { pinctrl-names = "default"; pinctrl-0 = <&i2c0m2_xfer>; -@@ -914,3 +929,18 @@ &usb_host1_xhci { +@@ -906,3 +921,18 @@ &usb_host1_xhci { &usb_host2_xhci { status = "okay"; }; @@ -15839,10 +29644,10 @@ index 6c1f1e867cd0..86f45719457c 100644 2.42.0 -From 39790a6aeb6bcfc8859c71e27c8ed3c9886c798b Mon Sep 17 00:00:00 2001 +From 9bbe75e7fc9edd25767cb5ce18ff99a787e785f8 Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 17 Jan 2024 01:53:38 +0200 -Subject: [PATCH 46/81] arm64: dts: rockchip: Enable HDMI0 on rk3588-evb1 +Subject: [PATCH 58/71] arm64: dts: rockchip: Enable HDMI0 on rk3588-evb1 Add the necessary DT changes to enable HDMI0 on Rockchip RK3588 EVB1. @@ -15852,7 +29657,7 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 30 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 5bd9999fcaf9..895a82081389 100644 +index 579ce6b6b5ff..f11916c4a328 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts @@ -9,6 +9,7 @@ @@ -15863,7 +29668,7 @@ index 5bd9999fcaf9..895a82081389 100644 #include #include "rk3588.dtsi" -@@ -344,6 +345,20 @@ &gmac0_rgmii_clk +@@ -336,6 +337,20 @@ &gmac0_rgmii_clk status = "okay"; }; @@ -15884,7 +29689,7 @@ index 5bd9999fcaf9..895a82081389 100644 &i2c2 { status = "okay"; -@@ -1314,3 +1329,18 @@ &usb_host1_xhci { +@@ -1318,3 +1333,18 @@ &usb_host1_xhci { dr_mode = "host"; status = "okay"; }; @@ -15907,10 +29712,10 @@ index 5bd9999fcaf9..895a82081389 100644 2.42.0 -From e983d7d133800d10b1edf9be219d235a66b9b272 Mon Sep 17 00:00:00 2001 +From 3456976f013dae68e4f705f2660e9e68196d3a8b Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Tue, 16 Jan 2024 03:13:38 +0200 -Subject: [PATCH 47/81] arm64: dts: rockchip: Enable HDMI0 PHY clk provider on +Subject: [PATCH 59/71] arm64: dts: rockchip: Enable HDMI0 PHY clk provider on rk3588 The HDMI0 PHY can be used as a clock provider on RK3588, hence add the @@ -15920,10 +29725,10 @@ missing #clock-cells property. 1 file changed, 1 insertion(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 33762fd61483..110bf1cec8b6 100644 +index 73a226dffb2d..bd3e2b03385c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -2939,6 +2939,7 @@ hdptxphy_hdmi0: phy@fed60000 { +@@ -2881,6 +2881,7 @@ hdptxphy_hdmi0: phy@fed60000 { reg = <0x0 0xfed60000 0x0 0x2000>; clocks = <&cru CLK_USB2PHY_HDPTXRXPHY_REF>, <&cru PCLK_HDPTX0>; clock-names = "ref", "apb"; @@ -15935,10 +29740,10 @@ index 33762fd61483..110bf1cec8b6 100644 2.42.0 -From 62a3c9af195d5b507eb14510040514c48c15539f Mon Sep 17 00:00:00 2001 +From d7657e8a884e1a750b28f474c0b3436dc9f77b0b Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Fri, 3 Nov 2023 20:05:05 +0200 -Subject: [PATCH 48/81] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on +Subject: [PATCH 60/71] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on rock-5b The initial vop2 support for rk3588 in mainline is not able to handle @@ -15959,11 +29764,11 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index 86f45719457c..c1fce00c9c18 100644 +index a0fa27545ee9..d1e78da13709 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -@@ -193,6 +193,11 @@ &cpu_l3 { - mem-supply = <&vdd_cpu_lit_mem_s0>; +@@ -185,6 +185,11 @@ &cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; }; +&display_subsystem { @@ -15978,10 +29783,10 @@ index 86f45719457c..c1fce00c9c18 100644 2.42.0 -From 49a03b386b08c45d129896eb41f020e03c22367d Mon Sep 17 00:00:00 2001 +From 80558e7ddda93d51f74bfb9b2625207f9d62a10d Mon Sep 17 00:00:00 2001 From: Cristian Ciocaltea Date: Wed, 17 Jan 2024 02:00:41 +0200 -Subject: [PATCH 49/81] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on +Subject: [PATCH 61/71] arm64: dts: rockchip: Make use of HDMI0 PHY PLL on rk3588-evb1 The initial vop2 support for rk3588 in mainline is not able to handle @@ -16002,11 +29807,11 @@ Signed-off-by: Cristian Ciocaltea 1 file changed, 5 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 895a82081389..4fac5f52f99d 100644 +index f11916c4a328..d4be4d01874d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -330,6 +330,11 @@ &cpu_l3 { - mem-supply = <&vdd_cpu_lit_mem_s0>; +@@ -322,6 +322,11 @@ &cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; }; +&display_subsystem { @@ -16021,14821 +29826,10 @@ index 895a82081389..4fac5f52f99d 100644 2.42.0 -From f0364dc96b456b5c1b26c14e4f35d8e798ad1d7e Mon Sep 17 00:00:00 2001 -From: Cristian Ciocaltea -Date: Fri, 10 Nov 2023 00:40:54 +0200 -Subject: [PATCH 50/81] arm64: defconfig: Enable Rockchip HDMI/DP Combo PHY - -Enable support for the Rockchip HDMI/DP Combo PHY, which is based on a -Samsung IP block. This is used by the RK3588 SoC family. - -Signed-off-by: Cristian Ciocaltea ---- - arch/arm64/configs/defconfig | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig -index 07890b86777e..ab24a68ebada 100644 ---- a/arch/arm64/configs/defconfig -+++ b/arch/arm64/configs/defconfig -@@ -1490,6 +1490,7 @@ CONFIG_PHY_ROCKCHIP_INNO_USB2=y - CONFIG_PHY_ROCKCHIP_INNO_DSIDPHY=m - CONFIG_PHY_ROCKCHIP_NANENG_COMBO_PHY=m - CONFIG_PHY_ROCKCHIP_PCIE=m -+CONFIG_PHY_ROCKCHIP_SAMSUNG_HDPTX=m - CONFIG_PHY_ROCKCHIP_SNPS_PCIE3=y - CONFIG_PHY_ROCKCHIP_TYPEC=y - CONFIG_PHY_ROCKCHIP_USBDP=m --- -2.42.0 - - -From b156cd8b78663c5e00326daa8b634e08e932036e Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:32 +0100 -Subject: [PATCH 51/81] drm/panthor: Add uAPI - -Panthor follows the lead of other recently submitted drivers with -ioctls allowing us to support modern Vulkan features, like sparse memory -binding: - -- Pretty standard GEM management ioctls (BO_CREATE and BO_MMAP_OFFSET), - with the 'exclusive-VM' bit to speed-up BO reservation on job submission -- VM management ioctls (VM_CREATE, VM_DESTROY and VM_BIND). The VM_BIND - ioctl is loosely based on the Xe model, and can handle both - asynchronous and synchronous requests -- GPU execution context creation/destruction, tiler heap context creation - and job submission. Those ioctls reflect how the hardware/scheduler - works and are thus driver specific. - -We also have a way to expose IO regions, such that the usermode driver -can directly access specific/well-isolate registers, like the -LATEST_FLUSH register used to implement cache-flush reduction. - -This uAPI intentionally keeps usermode queues out of the scope, which -explains why doorbell registers and command stream ring-buffers are not -directly exposed to userspace. - -v4: -- Add a VM_GET_STATE ioctl -- Fix doc -- Expose the CORE_FEATURES register so we can deal with variants in the - UMD -- Add Steve's R-b - -v3: -- Add the concept of sync-only VM operation -- Fix support for 32-bit userspace -- Rework drm_panthor_vm_create to pass the user VA size instead of - the kernel VA size (suggested by Robin Murphy) -- Typo fixes -- Explicitly cast enums with top bit set to avoid compiler warnings in - -pedantic mode. -- Drop property core_group_count as it can be easily calculated by the - number of bits set in l2_present. - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-2-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - Documentation/gpu/driver-uapi.rst | 5 + - include/uapi/drm/panthor_drm.h | 945 ++++++++++++++++++++++++++++++ - 2 files changed, 950 insertions(+) - create mode 100644 include/uapi/drm/panthor_drm.h - -diff --git a/Documentation/gpu/driver-uapi.rst b/Documentation/gpu/driver-uapi.rst -index e5070a0e95ab..9fac7875e862 100644 ---- a/Documentation/gpu/driver-uapi.rst -+++ b/Documentation/gpu/driver-uapi.rst -@@ -22,3 +22,8 @@ drm/xe uAPI - =========== - - .. kernel-doc:: include/uapi/drm/xe_drm.h -+ -+drm/panthor uAPI -+================ -+ -+.. kernel-doc:: include/uapi/drm/panthor_drm.h -diff --git a/include/uapi/drm/panthor_drm.h b/include/uapi/drm/panthor_drm.h -new file mode 100644 -index 000000000000..47376cfaa949 ---- /dev/null -+++ b/include/uapi/drm/panthor_drm.h -@@ -0,0 +1,945 @@ -+/* SPDX-License-Identifier: MIT */ -+/* Copyright (C) 2023 Collabora ltd. */ -+#ifndef _PANTHOR_DRM_H_ -+#define _PANTHOR_DRM_H_ -+ -+#include "drm.h" -+ -+#if defined(__cplusplus) -+extern "C" { -+#endif -+ -+/** -+ * DOC: Introduction -+ * -+ * This documentation describes the Panthor IOCTLs. -+ * -+ * Just a few generic rules about the data passed to the Panthor IOCTLs: -+ * -+ * - Structures must be aligned on 64-bit/8-byte. If the object is not -+ * naturally aligned, a padding field must be added. -+ * - Fields must be explicitly aligned to their natural type alignment with -+ * pad[0..N] fields. -+ * - All padding fields will be checked by the driver to make sure they are -+ * zeroed. -+ * - Flags can be added, but not removed/replaced. -+ * - New fields can be added to the main structures (the structures -+ * directly passed to the ioctl). Those fields can be added at the end of -+ * the structure, or replace existing padding fields. Any new field being -+ * added must preserve the behavior that existed before those fields were -+ * added when a value of zero is passed. -+ * - New fields can be added to indirect objects (objects pointed by the -+ * main structure), iff those objects are passed a size to reflect the -+ * size known by the userspace driver (see drm_panthor_obj_array::stride -+ * or drm_panthor_dev_query::size). -+ * - If the kernel driver is too old to know some fields, those will be -+ * ignored if zero, and otherwise rejected (and so will be zero on output). -+ * - If userspace is too old to know some fields, those will be zeroed -+ * (input) before the structure is parsed by the kernel driver. -+ * - Each new flag/field addition must come with a driver version update so -+ * the userspace driver doesn't have to trial and error to know which -+ * flags are supported. -+ * - Structures should not contain unions, as this would defeat the -+ * extensibility of such structures. -+ * - IOCTLs can't be removed or replaced. New IOCTL IDs should be placed -+ * at the end of the drm_panthor_ioctl_id enum. -+ */ -+ -+/** -+ * DOC: MMIO regions exposed to userspace. -+ * -+ * .. c:macro:: DRM_PANTHOR_USER_MMIO_OFFSET -+ * -+ * File offset for all MMIO regions being exposed to userspace. Don't use -+ * this value directly, use DRM_PANTHOR_USER__OFFSET values instead. -+ * pgoffset passed to mmap2() is an unsigned long, which forces us to use a -+ * different offset on 32-bit and 64-bit systems. -+ * -+ * .. c:macro:: DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET -+ * -+ * File offset for the LATEST_FLUSH_ID register. The Userspace driver controls -+ * GPU cache flushing through CS instructions, but the flush reduction -+ * mechanism requires a flush_id. This flush_id could be queried with an -+ * ioctl, but Arm provides a well-isolated register page containing only this -+ * read-only register, so let's expose this page through a static mmap offset -+ * and allow direct mapping of this MMIO region so we can avoid the -+ * user <-> kernel round-trip. -+ */ -+#define DRM_PANTHOR_USER_MMIO_OFFSET_32BIT (1ull << 43) -+#define DRM_PANTHOR_USER_MMIO_OFFSET_64BIT (1ull << 56) -+#define DRM_PANTHOR_USER_MMIO_OFFSET (sizeof(unsigned long) < 8 ? \ -+ DRM_PANTHOR_USER_MMIO_OFFSET_32BIT : \ -+ DRM_PANTHOR_USER_MMIO_OFFSET_64BIT) -+#define DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET (DRM_PANTHOR_USER_MMIO_OFFSET | 0) -+ -+/** -+ * DOC: IOCTL IDs -+ * -+ * enum drm_panthor_ioctl_id - IOCTL IDs -+ * -+ * Place new ioctls at the end, don't re-order, don't replace or remove entries. -+ * -+ * These IDs are not meant to be used directly. Use the DRM_IOCTL_PANTHOR_xxx -+ * definitions instead. -+ */ -+enum drm_panthor_ioctl_id { -+ /** @DRM_PANTHOR_DEV_QUERY: Query device information. */ -+ DRM_PANTHOR_DEV_QUERY = 0, -+ -+ /** @DRM_PANTHOR_VM_CREATE: Create a VM. */ -+ DRM_PANTHOR_VM_CREATE, -+ -+ /** @DRM_PANTHOR_VM_DESTROY: Destroy a VM. */ -+ DRM_PANTHOR_VM_DESTROY, -+ -+ /** @DRM_PANTHOR_VM_BIND: Bind/unbind memory to a VM. */ -+ DRM_PANTHOR_VM_BIND, -+ -+ /** @DRM_PANTHOR_VM_GET_STATE: Get VM state. */ -+ DRM_PANTHOR_VM_GET_STATE, -+ -+ /** @DRM_PANTHOR_BO_CREATE: Create a buffer object. */ -+ DRM_PANTHOR_BO_CREATE, -+ -+ /** -+ * @DRM_PANTHOR_BO_MMAP_OFFSET: Get the file offset to pass to -+ * mmap to map a GEM object. -+ */ -+ DRM_PANTHOR_BO_MMAP_OFFSET, -+ -+ /** @DRM_PANTHOR_GROUP_CREATE: Create a scheduling group. */ -+ DRM_PANTHOR_GROUP_CREATE, -+ -+ /** @DRM_PANTHOR_GROUP_DESTROY: Destroy a scheduling group. */ -+ DRM_PANTHOR_GROUP_DESTROY, -+ -+ /** -+ * @DRM_PANTHOR_GROUP_SUBMIT: Submit jobs to queues belonging -+ * to a specific scheduling group. -+ */ -+ DRM_PANTHOR_GROUP_SUBMIT, -+ -+ /** @DRM_PANTHOR_GROUP_GET_STATE: Get the state of a scheduling group. */ -+ DRM_PANTHOR_GROUP_GET_STATE, -+ -+ /** @DRM_PANTHOR_TILER_HEAP_CREATE: Create a tiler heap. */ -+ DRM_PANTHOR_TILER_HEAP_CREATE, -+ -+ /** @DRM_PANTHOR_TILER_HEAP_DESTROY: Destroy a tiler heap. */ -+ DRM_PANTHOR_TILER_HEAP_DESTROY, -+}; -+ -+/** -+ * DRM_IOCTL_PANTHOR() - Build a Panthor IOCTL number -+ * @__access: Access type. Must be R, W or RW. -+ * @__id: One of the DRM_PANTHOR_xxx id. -+ * @__type: Suffix of the type being passed to the IOCTL. -+ * -+ * Don't use this macro directly, use the DRM_IOCTL_PANTHOR_xxx -+ * values instead. -+ * -+ * Return: An IOCTL number to be passed to ioctl() from userspace. -+ */ -+#define DRM_IOCTL_PANTHOR(__access, __id, __type) \ -+ DRM_IO ## __access(DRM_COMMAND_BASE + DRM_PANTHOR_ ## __id, \ -+ struct drm_panthor_ ## __type) -+ -+#define DRM_IOCTL_PANTHOR_DEV_QUERY \ -+ DRM_IOCTL_PANTHOR(WR, DEV_QUERY, dev_query) -+#define DRM_IOCTL_PANTHOR_VM_CREATE \ -+ DRM_IOCTL_PANTHOR(WR, VM_CREATE, vm_create) -+#define DRM_IOCTL_PANTHOR_VM_DESTROY \ -+ DRM_IOCTL_PANTHOR(WR, VM_DESTROY, vm_destroy) -+#define DRM_IOCTL_PANTHOR_VM_BIND \ -+ DRM_IOCTL_PANTHOR(WR, VM_BIND, vm_bind) -+#define DRM_IOCTL_PANTHOR_VM_GET_STATE \ -+ DRM_IOCTL_PANTHOR(WR, VM_GET_STATE, vm_get_state) -+#define DRM_IOCTL_PANTHOR_BO_CREATE \ -+ DRM_IOCTL_PANTHOR(WR, BO_CREATE, bo_create) -+#define DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET \ -+ DRM_IOCTL_PANTHOR(WR, BO_MMAP_OFFSET, bo_mmap_offset) -+#define DRM_IOCTL_PANTHOR_GROUP_CREATE \ -+ DRM_IOCTL_PANTHOR(WR, GROUP_CREATE, group_create) -+#define DRM_IOCTL_PANTHOR_GROUP_DESTROY \ -+ DRM_IOCTL_PANTHOR(WR, GROUP_DESTROY, group_destroy) -+#define DRM_IOCTL_PANTHOR_GROUP_SUBMIT \ -+ DRM_IOCTL_PANTHOR(WR, GROUP_SUBMIT, group_submit) -+#define DRM_IOCTL_PANTHOR_GROUP_GET_STATE \ -+ DRM_IOCTL_PANTHOR(WR, GROUP_GET_STATE, group_get_state) -+#define DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE \ -+ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_CREATE, tiler_heap_create) -+#define DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY \ -+ DRM_IOCTL_PANTHOR(WR, TILER_HEAP_DESTROY, tiler_heap_destroy) -+ -+/** -+ * DOC: IOCTL arguments -+ */ -+ -+/** -+ * struct drm_panthor_obj_array - Object array. -+ * -+ * This object is used to pass an array of objects whose size is subject to changes in -+ * future versions of the driver. In order to support this mutability, we pass a stride -+ * describing the size of the object as known by userspace. -+ * -+ * You shouldn't fill drm_panthor_obj_array fields directly. You should instead use -+ * the DRM_PANTHOR_OBJ_ARRAY() macro that takes care of initializing the stride to -+ * the object size. -+ */ -+struct drm_panthor_obj_array { -+ /** @stride: Stride of object struct. Used for versioning. */ -+ __u32 stride; -+ -+ /** @count: Number of objects in the array. */ -+ __u32 count; -+ -+ /** @array: User pointer to an array of objects. */ -+ __u64 array; -+}; -+ -+/** -+ * DRM_PANTHOR_OBJ_ARRAY() - Initialize a drm_panthor_obj_array field. -+ * @cnt: Number of elements in the array. -+ * @ptr: Pointer to the array to pass to the kernel. -+ * -+ * Macro initializing a drm_panthor_obj_array based on the object size as known -+ * by userspace. -+ */ -+#define DRM_PANTHOR_OBJ_ARRAY(cnt, ptr) \ -+ { .stride = sizeof((ptr)[0]), .count = (cnt), .array = (__u64)(uintptr_t)(ptr) } -+ -+/** -+ * enum drm_panthor_sync_op_flags - Synchronization operation flags. -+ */ -+enum drm_panthor_sync_op_flags { -+ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK: Synchronization handle type mask. */ -+ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK = 0xff, -+ -+ /** @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ: Synchronization object type. */ -+ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ = 0, -+ -+ /** -+ * @DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ: Timeline synchronization -+ * object type. -+ */ -+ DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ = 1, -+ -+ /** @DRM_PANTHOR_SYNC_OP_WAIT: Wait operation. */ -+ DRM_PANTHOR_SYNC_OP_WAIT = 0 << 31, -+ -+ /** @DRM_PANTHOR_SYNC_OP_SIGNAL: Signal operation. */ -+ DRM_PANTHOR_SYNC_OP_SIGNAL = (int)(1u << 31), -+}; -+ -+/** -+ * struct drm_panthor_sync_op - Synchronization operation. -+ */ -+struct drm_panthor_sync_op { -+ /** @flags: Synchronization operation flags. Combination of DRM_PANTHOR_SYNC_OP values. */ -+ __u32 flags; -+ -+ /** @handle: Sync handle. */ -+ __u32 handle; -+ -+ /** -+ * @timeline_value: MBZ if -+ * (flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK) != -+ * DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ. -+ */ -+ __u64 timeline_value; -+}; -+ -+/** -+ * enum drm_panthor_dev_query_type - Query type -+ * -+ * Place new types at the end, don't re-order, don't remove or replace. -+ */ -+enum drm_panthor_dev_query_type { -+ /** @DRM_PANTHOR_DEV_QUERY_GPU_INFO: Query GPU information. */ -+ DRM_PANTHOR_DEV_QUERY_GPU_INFO = 0, -+ -+ /** @DRM_PANTHOR_DEV_QUERY_CSIF_INFO: Query command-stream interface information. */ -+ DRM_PANTHOR_DEV_QUERY_CSIF_INFO, -+}; -+ -+/** -+ * struct drm_panthor_gpu_info - GPU information -+ * -+ * Structure grouping all queryable information relating to the GPU. -+ */ -+struct drm_panthor_gpu_info { -+ /** @gpu_id : GPU ID. */ -+ __u32 gpu_id; -+#define DRM_PANTHOR_ARCH_MAJOR(x) ((x) >> 28) -+#define DRM_PANTHOR_ARCH_MINOR(x) (((x) >> 24) & 0xf) -+#define DRM_PANTHOR_ARCH_REV(x) (((x) >> 20) & 0xf) -+#define DRM_PANTHOR_PRODUCT_MAJOR(x) (((x) >> 16) & 0xf) -+#define DRM_PANTHOR_VERSION_MAJOR(x) (((x) >> 12) & 0xf) -+#define DRM_PANTHOR_VERSION_MINOR(x) (((x) >> 4) & 0xff) -+#define DRM_PANTHOR_VERSION_STATUS(x) ((x) & 0xf) -+ -+ /** @gpu_rev: GPU revision. */ -+ __u32 gpu_rev; -+ -+ /** @csf_id: Command stream frontend ID. */ -+ __u32 csf_id; -+#define DRM_PANTHOR_CSHW_MAJOR(x) (((x) >> 26) & 0x3f) -+#define DRM_PANTHOR_CSHW_MINOR(x) (((x) >> 20) & 0x3f) -+#define DRM_PANTHOR_CSHW_REV(x) (((x) >> 16) & 0xf) -+#define DRM_PANTHOR_MCU_MAJOR(x) (((x) >> 10) & 0x3f) -+#define DRM_PANTHOR_MCU_MINOR(x) (((x) >> 4) & 0x3f) -+#define DRM_PANTHOR_MCU_REV(x) ((x) & 0xf) -+ -+ /** @l2_features: L2-cache features. */ -+ __u32 l2_features; -+ -+ /** @tiler_features: Tiler features. */ -+ __u32 tiler_features; -+ -+ /** @mem_features: Memory features. */ -+ __u32 mem_features; -+ -+ /** @mmu_features: MMU features. */ -+ __u32 mmu_features; -+#define DRM_PANTHOR_MMU_VA_BITS(x) ((x) & 0xff) -+ -+ /** @thread_features: Thread features. */ -+ __u32 thread_features; -+ -+ /** @max_threads: Maximum number of threads. */ -+ __u32 max_threads; -+ -+ /** @thread_max_workgroup_size: Maximum workgroup size. */ -+ __u32 thread_max_workgroup_size; -+ -+ /** -+ * @thread_max_barrier_size: Maximum number of threads that can wait -+ * simultaneously on a barrier. -+ */ -+ __u32 thread_max_barrier_size; -+ -+ /** @coherency_features: Coherency features. */ -+ __u32 coherency_features; -+ -+ /** @texture_features: Texture features. */ -+ __u32 texture_features[4]; -+ -+ /** @as_present: Bitmask encoding the number of address-space exposed by the MMU. */ -+ __u32 as_present; -+ -+ /** @shader_present: Bitmask encoding the shader cores exposed by the GPU. */ -+ __u64 shader_present; -+ -+ /** @l2_present: Bitmask encoding the L2 caches exposed by the GPU. */ -+ __u64 l2_present; -+ -+ /** @tiler_present: Bitmask encoding the tiler units exposed by the GPU. */ -+ __u64 tiler_present; -+ -+ /* @core_features: Used to discriminate core variants when they exist. */ -+ __u32 core_features; -+ -+ /* @pad: MBZ. */ -+ __u32 pad; -+}; -+ -+/** -+ * struct drm_panthor_csif_info - Command stream interface information -+ * -+ * Structure grouping all queryable information relating to the command stream interface. -+ */ -+struct drm_panthor_csif_info { -+ /** @csg_slot_count: Number of command stream group slots exposed by the firmware. */ -+ __u32 csg_slot_count; -+ -+ /** @cs_slot_count: Number of command stream slots per group. */ -+ __u32 cs_slot_count; -+ -+ /** @cs_reg_count: Number of command stream registers. */ -+ __u32 cs_reg_count; -+ -+ /** @scoreboard_slot_count: Number of scoreboard slots. */ -+ __u32 scoreboard_slot_count; -+ -+ /** -+ * @unpreserved_cs_reg_count: Number of command stream registers reserved by -+ * the kernel driver to call a userspace command stream. -+ * -+ * All registers can be used by a userspace command stream, but the -+ * [cs_slot_count - unpreserved_cs_reg_count .. cs_slot_count] registers are -+ * used by the kernel when DRM_PANTHOR_IOCTL_GROUP_SUBMIT is called. -+ */ -+ __u32 unpreserved_cs_reg_count; -+ -+ /** -+ * @pad: Padding field, set to zero. -+ */ -+ __u32 pad; -+}; -+ -+/** -+ * struct drm_panthor_dev_query - Arguments passed to DRM_PANTHOR_IOCTL_DEV_QUERY -+ */ -+struct drm_panthor_dev_query { -+ /** @type: the query type (see drm_panthor_dev_query_type). */ -+ __u32 type; -+ -+ /** -+ * @size: size of the type being queried. -+ * -+ * If pointer is NULL, size is updated by the driver to provide the -+ * output structure size. If pointer is not NULL, the driver will -+ * only copy min(size, actual_structure_size) bytes to the pointer, -+ * and update the size accordingly. This allows us to extend query -+ * types without breaking userspace. -+ */ -+ __u32 size; -+ -+ /** -+ * @pointer: user pointer to a query type struct. -+ * -+ * Pointer can be NULL, in which case, nothing is copied, but the -+ * actual structure size is returned. If not NULL, it must point to -+ * a location that's large enough to hold size bytes. -+ */ -+ __u64 pointer; -+}; -+ -+/** -+ * struct drm_panthor_vm_create - Arguments passed to DRM_PANTHOR_IOCTL_VM_CREATE -+ */ -+struct drm_panthor_vm_create { -+ /** @flags: VM flags, MBZ. */ -+ __u32 flags; -+ -+ /** @id: Returned VM ID. */ -+ __u32 id; -+ -+ /** -+ * @user_va_range: Size of the VA space reserved for user objects. -+ * -+ * The kernel will pick the remaining space to map kernel-only objects to the -+ * VM (heap chunks, heap context, ring buffers, kernel synchronization objects, -+ * ...). If the space left for kernel objects is too small, kernel object -+ * allocation will fail further down the road. One can use -+ * drm_panthor_gpu_info::mmu_features to extract the total virtual address -+ * range, and chose a user_va_range that leaves some space to the kernel. -+ * -+ * If user_va_range is zero, the kernel will pick a sensible value based on -+ * TASK_SIZE and the virtual range supported by the GPU MMU (the kernel/user -+ * split should leave enough VA space for userspace processes to support SVM, -+ * while still allowing the kernel to map some amount of kernel objects in -+ * the kernel VA range). The value chosen by the driver will be returned in -+ * @user_va_range. -+ * -+ * User VA space always starts at 0x0, kernel VA space is always placed after -+ * the user VA range. -+ */ -+ __u64 user_va_range; -+}; -+ -+/** -+ * struct drm_panthor_vm_destroy - Arguments passed to DRM_PANTHOR_IOCTL_VM_DESTROY -+ */ -+struct drm_panthor_vm_destroy { -+ /** @id: ID of the VM to destroy. */ -+ __u32 id; -+ -+ /** @pad: MBZ. */ -+ __u32 pad; -+}; -+ -+/** -+ * enum drm_panthor_vm_bind_op_flags - VM bind operation flags -+ */ -+enum drm_panthor_vm_bind_op_flags { -+ /** -+ * @DRM_PANTHOR_VM_BIND_OP_MAP_READONLY: Map the memory read-only. -+ * -+ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. -+ */ -+ DRM_PANTHOR_VM_BIND_OP_MAP_READONLY = 1 << 0, -+ -+ /** -+ * @DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC: Map the memory not-executable. -+ * -+ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. -+ */ -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC = 1 << 1, -+ -+ /** -+ * @DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED: Map the memory uncached. -+ * -+ * Only valid with DRM_PANTHOR_VM_BIND_OP_TYPE_MAP. -+ */ -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED = 1 << 2, -+ -+ /** -+ * @DRM_PANTHOR_VM_BIND_OP_TYPE_MASK: Mask used to determine the type of operation. -+ */ -+ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK = (int)(0xfu << 28), -+ -+ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: Map operation. */ -+ DRM_PANTHOR_VM_BIND_OP_TYPE_MAP = 0 << 28, -+ -+ /** @DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: Unmap operation. */ -+ DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP = 1 << 28, -+ -+ /** -+ * @DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: No VM operation. -+ * -+ * Just serves as a synchronization point on a VM queue. -+ * -+ * Only valid if %DRM_PANTHOR_VM_BIND_ASYNC is set in drm_panthor_vm_bind::flags, -+ * and drm_panthor_vm_bind_op::syncs contains at least one element. -+ */ -+ DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY = 2 << 28, -+}; -+ -+/** -+ * struct drm_panthor_vm_bind_op - VM bind operation -+ */ -+struct drm_panthor_vm_bind_op { -+ /** @flags: Combination of drm_panthor_vm_bind_op_flags flags. */ -+ __u32 flags; -+ -+ /** -+ * @bo_handle: Handle of the buffer object to map. -+ * MBZ for unmap or sync-only operations. -+ */ -+ __u32 bo_handle; -+ -+ /** -+ * @bo_offset: Buffer object offset. -+ * MBZ for unmap or sync-only operations. -+ */ -+ __u64 bo_offset; -+ -+ /** -+ * @va: Virtual address to map/unmap. -+ * MBZ for sync-only operations. -+ */ -+ __u64 va; -+ -+ /** -+ * @size: Size to map/unmap. -+ * MBZ for sync-only operations. -+ */ -+ __u64 size; -+ -+ /** -+ * @syncs: Array of struct drm_panthor_sync_op synchronization -+ * operations. -+ * -+ * This array must be empty if %DRM_PANTHOR_VM_BIND_ASYNC is not set on -+ * the drm_panthor_vm_bind object containing this VM bind operation. -+ * -+ * This array shall not be empty for sync-only operations. -+ */ -+ struct drm_panthor_obj_array syncs; -+ -+}; -+ -+/** -+ * enum drm_panthor_vm_bind_flags - VM bind flags -+ */ -+enum drm_panthor_vm_bind_flags { -+ /** -+ * @DRM_PANTHOR_VM_BIND_ASYNC: VM bind operations are queued to the VM -+ * queue instead of being executed synchronously. -+ */ -+ DRM_PANTHOR_VM_BIND_ASYNC = 1 << 0, -+}; -+ -+/** -+ * struct drm_panthor_vm_bind - Arguments passed to DRM_IOCTL_PANTHOR_VM_BIND -+ */ -+struct drm_panthor_vm_bind { -+ /** @vm_id: VM targeted by the bind request. */ -+ __u32 vm_id; -+ -+ /** @flags: Combination of drm_panthor_vm_bind_flags flags. */ -+ __u32 flags; -+ -+ /** @ops: Array of struct drm_panthor_vm_bind_op bind operations. */ -+ struct drm_panthor_obj_array ops; -+}; -+ -+/** -+ * enum drm_panthor_vm_state - VM states. -+ */ -+enum drm_panthor_vm_state { -+ /** -+ * @DRM_PANTHOR_VM_STATE_USABLE: VM is usable. -+ * -+ * New VM operations will be accepted on this VM. -+ */ -+ DRM_PANTHOR_VM_STATE_USABLE, -+ -+ /** -+ * @DRM_PANTHOR_VM_STATE_UNSABLE: VM is unsable. -+ * -+ * Something put the VM in an unusable state (like an asynchronous -+ * VM_BIND request failing for any reason). -+ * -+ * Once the VM is in this state, all new MAP operations will be -+ * rejected, and any GPU job targeting this VM will fail. -+ * UNMAP operations are still accepted. -+ * -+ * The only way to recover from an unusable VM is to create a new -+ * VM, and destroy the old one. -+ */ -+ DRM_PANTHOR_VM_STATE_UNUSABLE, -+}; -+ -+/** -+ * struct drm_panthor_vm_get_state - Get VM state. -+ */ -+struct drm_panthor_vm_get_state { -+ /** @vm_id: VM targeted by the get_state request. */ -+ __u32 vm_id; -+ -+ /** -+ * @state: state returned by the driver. -+ * -+ * Must be one of the enum drm_panthor_vm_state values. -+ */ -+ __u32 state; -+}; -+ -+/** -+ * enum drm_panthor_bo_flags - Buffer object flags, passed at creation time. -+ */ -+enum drm_panthor_bo_flags { -+ /** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */ -+ DRM_PANTHOR_BO_NO_MMAP = (1 << 0), -+}; -+ -+/** -+ * struct drm_panthor_bo_create - Arguments passed to DRM_IOCTL_PANTHOR_BO_CREATE. -+ */ -+struct drm_panthor_bo_create { -+ /** -+ * @size: Requested size for the object -+ * -+ * The (page-aligned) allocated size for the object will be returned. -+ */ -+ __u64 size; -+ -+ /** -+ * @flags: Flags. Must be a combination of drm_panthor_bo_flags flags. -+ */ -+ __u32 flags; -+ -+ /** -+ * @exclusive_vm_id: Exclusive VM this buffer object will be mapped to. -+ * -+ * If not zero, the field must refer to a valid VM ID, and implies that: -+ * - the buffer object will only ever be bound to that VM -+ * - cannot be exported as a PRIME fd -+ */ -+ __u32 exclusive_vm_id; -+ -+ /** -+ * @handle: Returned handle for the object. -+ * -+ * Object handles are nonzero. -+ */ -+ __u32 handle; -+ -+ /** @pad: MBZ. */ -+ __u32 pad; -+}; -+ -+/** -+ * struct drm_panthor_bo_mmap_offset - Arguments passed to DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET. -+ */ -+struct drm_panthor_bo_mmap_offset { -+ /** @handle: Handle of the object we want an mmap offset for. */ -+ __u32 handle; -+ -+ /** @pad: MBZ. */ -+ __u32 pad; -+ -+ /** @offset: The fake offset to use for subsequent mmap calls. */ -+ __u64 offset; -+}; -+ -+/** -+ * struct drm_panthor_queue_create - Queue creation arguments. -+ */ -+struct drm_panthor_queue_create { -+ /** -+ * @priority: Defines the priority of queues inside a group. Goes from 0 to 15, -+ * 15 being the highest priority. -+ */ -+ __u8 priority; -+ -+ /** @pad: Padding fields, MBZ. */ -+ __u8 pad[3]; -+ -+ /** @ringbuf_size: Size of the ring buffer to allocate to this queue. */ -+ __u32 ringbuf_size; -+}; -+ -+/** -+ * enum drm_panthor_group_priority - Scheduling group priority -+ */ -+enum drm_panthor_group_priority { -+ /** @PANTHOR_GROUP_PRIORITY_LOW: Low priority group. */ -+ PANTHOR_GROUP_PRIORITY_LOW = 0, -+ -+ /** @PANTHOR_GROUP_PRIORITY_MEDIUM: Medium priority group. */ -+ PANTHOR_GROUP_PRIORITY_MEDIUM, -+ -+ /** @PANTHOR_GROUP_PRIORITY_HIGH: High priority group. */ -+ PANTHOR_GROUP_PRIORITY_HIGH, -+}; -+ -+/** -+ * struct drm_panthor_group_create - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_CREATE -+ */ -+struct drm_panthor_group_create { -+ /** @queues: Array of drm_panthor_queue_create elements. */ -+ struct drm_panthor_obj_array queues; -+ -+ /** -+ * @max_compute_cores: Maximum number of cores that can be used by compute -+ * jobs across CS queues bound to this group. -+ * -+ * Must be less or equal to the number of bits set in @compute_core_mask. -+ */ -+ __u8 max_compute_cores; -+ -+ /** -+ * @max_fragment_cores: Maximum number of cores that can be used by fragment -+ * jobs across CS queues bound to this group. -+ * -+ * Must be less or equal to the number of bits set in @fragment_core_mask. -+ */ -+ __u8 max_fragment_cores; -+ -+ /** -+ * @max_tiler_cores: Maximum number of tilers that can be used by tiler jobs -+ * across CS queues bound to this group. -+ * -+ * Must be less or equal to the number of bits set in @tiler_core_mask. -+ */ -+ __u8 max_tiler_cores; -+ -+ /** @priority: Group priority (see enum drm_panthor_group_priority). */ -+ __u8 priority; -+ -+ /** @pad: Padding field, MBZ. */ -+ __u32 pad; -+ -+ /** -+ * @compute_core_mask: Mask encoding cores that can be used for compute jobs. -+ * -+ * This field must have at least @max_compute_cores bits set. -+ * -+ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. -+ */ -+ __u64 compute_core_mask; -+ -+ /** -+ * @fragment_core_mask: Mask encoding cores that can be used for fragment jobs. -+ * -+ * This field must have at least @max_fragment_cores bits set. -+ * -+ * The bits set here should also be set in drm_panthor_gpu_info::shader_present. -+ */ -+ __u64 fragment_core_mask; -+ -+ /** -+ * @tiler_core_mask: Mask encoding cores that can be used for tiler jobs. -+ * -+ * This field must have at least @max_tiler_cores bits set. -+ * -+ * The bits set here should also be set in drm_panthor_gpu_info::tiler_present. -+ */ -+ __u64 tiler_core_mask; -+ -+ /** -+ * @vm_id: VM ID to bind this group to. -+ * -+ * All submission to queues bound to this group will use this VM. -+ */ -+ __u32 vm_id; -+ -+ /** -+ * @group_handle: Returned group handle. Passed back when submitting jobs or -+ * destroying a group. -+ */ -+ __u32 group_handle; -+}; -+ -+/** -+ * struct drm_panthor_group_destroy - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_DESTROY -+ */ -+struct drm_panthor_group_destroy { -+ /** @group_handle: Group to destroy */ -+ __u32 group_handle; -+ -+ /** @pad: Padding field, MBZ. */ -+ __u32 pad; -+}; -+ -+/** -+ * struct drm_panthor_queue_submit - Job submission arguments. -+ * -+ * This is describing the userspace command stream to call from the kernel -+ * command stream ring-buffer. Queue submission is always part of a group -+ * submission, taking one or more jobs to submit to the underlying queues. -+ */ -+struct drm_panthor_queue_submit { -+ /** @queue_index: Index of the queue inside a group. */ -+ __u32 queue_index; -+ -+ /** -+ * @stream_size: Size of the command stream to execute. -+ * -+ * Must be 64-bit/8-byte aligned (the size of a CS instruction) -+ * -+ * Can be zero if stream_addr is zero too. -+ */ -+ __u32 stream_size; -+ -+ /** -+ * @stream_addr: GPU address of the command stream to execute. -+ * -+ * Must be aligned on 64-byte. -+ * -+ * Can be zero is stream_size is zero too. -+ */ -+ __u64 stream_addr; -+ -+ /** -+ * @latest_flush: FLUSH_ID read at the time the stream was built. -+ * -+ * This allows cache flush elimination for the automatic -+ * flush+invalidate(all) done at submission time, which is needed to -+ * ensure the GPU doesn't get garbage when reading the indirect command -+ * stream buffers. If you want the cache flush to happen -+ * unconditionally, pass a zero here. -+ */ -+ __u32 latest_flush; -+ -+ /** @pad: MBZ. */ -+ __u32 pad; -+ -+ /** @syncs: Array of struct drm_panthor_sync_op sync operations. */ -+ struct drm_panthor_obj_array syncs; -+}; -+ -+/** -+ * struct drm_panthor_group_submit - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_SUBMIT -+ */ -+struct drm_panthor_group_submit { -+ /** @group_handle: Handle of the group to queue jobs to. */ -+ __u32 group_handle; -+ -+ /** @pad: MBZ. */ -+ __u32 pad; -+ -+ /** @queue_submits: Array of drm_panthor_queue_submit objects. */ -+ struct drm_panthor_obj_array queue_submits; -+}; -+ -+/** -+ * enum drm_panthor_group_state_flags - Group state flags -+ */ -+enum drm_panthor_group_state_flags { -+ /** -+ * @DRM_PANTHOR_GROUP_STATE_TIMEDOUT: Group had unfinished jobs. -+ * -+ * When a group ends up with this flag set, no jobs can be submitted to its queues. -+ */ -+ DRM_PANTHOR_GROUP_STATE_TIMEDOUT = 1 << 0, -+ -+ /** -+ * @DRM_PANTHOR_GROUP_STATE_FATAL_FAULT: Group had fatal faults. -+ * -+ * When a group ends up with this flag set, no jobs can be submitted to its queues. -+ */ -+ DRM_PANTHOR_GROUP_STATE_FATAL_FAULT = 1 << 1, -+}; -+ -+/** -+ * struct drm_panthor_group_get_state - Arguments passed to DRM_IOCTL_PANTHOR_GROUP_GET_STATE -+ * -+ * Used to query the state of a group and decide whether a new group should be created to -+ * replace it. -+ */ -+struct drm_panthor_group_get_state { -+ /** @group_handle: Handle of the group to query state on */ -+ __u32 group_handle; -+ -+ /** -+ * @state: Combination of DRM_PANTHOR_GROUP_STATE_* flags encoding the -+ * group state. -+ */ -+ __u32 state; -+ -+ /** @fatal_queues: Bitmask of queues that faced fatal faults. */ -+ __u32 fatal_queues; -+ -+ /** @pad: MBZ */ -+ __u32 pad; -+}; -+ -+/** -+ * struct drm_panthor_tiler_heap_create - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE -+ */ -+struct drm_panthor_tiler_heap_create { -+ /** @vm_id: VM ID the tiler heap should be mapped to */ -+ __u32 vm_id; -+ -+ /** @initial_chunk_count: Initial number of chunks to allocate. */ -+ __u32 initial_chunk_count; -+ -+ /** @chunk_size: Chunk size. Must be a power of two at least 256KB large. */ -+ __u32 chunk_size; -+ -+ /** @max_chunks: Maximum number of chunks that can be allocated. */ -+ __u32 max_chunks; -+ -+ /** -+ * @target_in_flight: Maximum number of in-flight render passes. -+ * -+ * If the heap has more than tiler jobs in-flight, the FW will wait for render -+ * passes to finish before queuing new tiler jobs. -+ */ -+ __u32 target_in_flight; -+ -+ /** @handle: Returned heap handle. Passed back to DESTROY_TILER_HEAP. */ -+ __u32 handle; -+ -+ /** @tiler_heap_ctx_gpu_va: Returned heap GPU virtual address returned */ -+ __u64 tiler_heap_ctx_gpu_va; -+ -+ /** -+ * @first_heap_chunk_gpu_va: First heap chunk. -+ * -+ * The tiler heap is formed of heap chunks forming a single-link list. This -+ * is the first element in the list. -+ */ -+ __u64 first_heap_chunk_gpu_va; -+}; -+ -+/** -+ * struct drm_panthor_tiler_heap_destroy - Arguments passed to DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY -+ */ -+struct drm_panthor_tiler_heap_destroy { -+ /** @handle: Handle of the tiler heap to destroy */ -+ __u32 handle; -+ -+ /** @pad: Padding field, MBZ. */ -+ __u32 pad; -+}; -+ -+#if defined(__cplusplus) -+} -+#endif -+ -+#endif /* _PANTHOR_DRM_H_ */ --- -2.42.0 - - -From 5f0204ec57b97982d008e5477e1418c1a1ab1cb3 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:33 +0100 -Subject: [PATCH 52/81] drm/panthor: Add GPU register definitions - -Those are the registers directly accessible through the MMIO range. - -FW registers are exposed in panthor_fw.h. - -v4: -- Add the CORE_FEATURES register (needed for GPU variants) -- Add Steve's R-b - -v3: -- Add macros to extract GPU ID info -- Formatting changes -- Remove AS_TRANSCFG_ADRMODE_LEGACY - it doesn't exist post-CSF -- Remove CSF_GPU_LATEST_FLUSH_ID_DEFAULT -- Add GPU_L2_FEATURES_LINE_SIZE for extracting the GPU cache line size - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-3-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_regs.h | 239 +++++++++++++++++++++++++ - 1 file changed, 239 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_regs.h - -diff --git a/drivers/gpu/drm/panthor/panthor_regs.h b/drivers/gpu/drm/panthor/panthor_regs.h -new file mode 100644 -index 000000000000..b7b3b3add166 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_regs.h -@@ -0,0 +1,239 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+/* -+ * Register definitions based on mali_kbase_gpu_regmap.h and -+ * mali_kbase_gpu_regmap_csf.h -+ * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved. -+ */ -+#ifndef __PANTHOR_REGS_H__ -+#define __PANTHOR_REGS_H__ -+ -+#define GPU_ID 0x0 -+#define GPU_ARCH_MAJOR(x) ((x) >> 28) -+#define GPU_ARCH_MINOR(x) (((x) & GENMASK(27, 24)) >> 24) -+#define GPU_ARCH_REV(x) (((x) & GENMASK(23, 20)) >> 20) -+#define GPU_PROD_MAJOR(x) (((x) & GENMASK(19, 16)) >> 16) -+#define GPU_VER_MAJOR(x) (((x) & GENMASK(15, 12)) >> 12) -+#define GPU_VER_MINOR(x) (((x) & GENMASK(11, 4)) >> 4) -+#define GPU_VER_STATUS(x) ((x) & GENMASK(3, 0)) -+ -+#define GPU_L2_FEATURES 0x4 -+#define GPU_L2_FEATURES_LINE_SIZE(x) (1 << ((x) & GENMASK(7, 0))) -+ -+#define GPU_CORE_FEATURES 0x8 -+ -+#define GPU_TILER_FEATURES 0xC -+#define GPU_MEM_FEATURES 0x10 -+#define GROUPS_L2_COHERENT BIT(0) -+ -+#define GPU_MMU_FEATURES 0x14 -+#define GPU_MMU_FEATURES_VA_BITS(x) ((x) & GENMASK(7, 0)) -+#define GPU_MMU_FEATURES_PA_BITS(x) (((x) >> 8) & GENMASK(7, 0)) -+#define GPU_AS_PRESENT 0x18 -+#define GPU_CSF_ID 0x1C -+ -+#define GPU_INT_RAWSTAT 0x20 -+#define GPU_INT_CLEAR 0x24 -+#define GPU_INT_MASK 0x28 -+#define GPU_INT_STAT 0x2c -+#define GPU_IRQ_FAULT BIT(0) -+#define GPU_IRQ_PROTM_FAULT BIT(1) -+#define GPU_IRQ_RESET_COMPLETED BIT(8) -+#define GPU_IRQ_POWER_CHANGED BIT(9) -+#define GPU_IRQ_POWER_CHANGED_ALL BIT(10) -+#define GPU_IRQ_CLEAN_CACHES_COMPLETED BIT(17) -+#define GPU_IRQ_DOORBELL_MIRROR BIT(18) -+#define GPU_IRQ_MCU_STATUS_CHANGED BIT(19) -+#define GPU_CMD 0x30 -+#define GPU_CMD_DEF(type, payload) ((type) | ((payload) << 8)) -+#define GPU_SOFT_RESET GPU_CMD_DEF(1, 1) -+#define GPU_HARD_RESET GPU_CMD_DEF(1, 2) -+#define CACHE_CLEAN BIT(0) -+#define CACHE_INV BIT(1) -+#define GPU_FLUSH_CACHES(l2, lsc, oth) \ -+ GPU_CMD_DEF(4, ((l2) << 0) | ((lsc) << 4) | ((oth) << 8)) -+ -+#define GPU_STATUS 0x34 -+#define GPU_STATUS_ACTIVE BIT(0) -+#define GPU_STATUS_PWR_ACTIVE BIT(1) -+#define GPU_STATUS_PAGE_FAULT BIT(4) -+#define GPU_STATUS_PROTM_ACTIVE BIT(7) -+#define GPU_STATUS_DBG_ENABLED BIT(8) -+ -+#define GPU_FAULT_STATUS 0x3C -+#define GPU_FAULT_ADDR_LO 0x40 -+#define GPU_FAULT_ADDR_HI 0x44 -+ -+#define GPU_PWR_KEY 0x50 -+#define GPU_PWR_KEY_UNLOCK 0x2968A819 -+#define GPU_PWR_OVERRIDE0 0x54 -+#define GPU_PWR_OVERRIDE1 0x58 -+ -+#define GPU_TIMESTAMP_OFFSET_LO 0x88 -+#define GPU_TIMESTAMP_OFFSET_HI 0x8C -+#define GPU_CYCLE_COUNT_LO 0x90 -+#define GPU_CYCLE_COUNT_HI 0x94 -+#define GPU_TIMESTAMP_LO 0x98 -+#define GPU_TIMESTAMP_HI 0x9C -+ -+#define GPU_THREAD_MAX_THREADS 0xA0 -+#define GPU_THREAD_MAX_WORKGROUP_SIZE 0xA4 -+#define GPU_THREAD_MAX_BARRIER_SIZE 0xA8 -+#define GPU_THREAD_FEATURES 0xAC -+ -+#define GPU_TEXTURE_FEATURES(n) (0xB0 + ((n) * 4)) -+ -+#define GPU_SHADER_PRESENT_LO 0x100 -+#define GPU_SHADER_PRESENT_HI 0x104 -+#define GPU_TILER_PRESENT_LO 0x110 -+#define GPU_TILER_PRESENT_HI 0x114 -+#define GPU_L2_PRESENT_LO 0x120 -+#define GPU_L2_PRESENT_HI 0x124 -+ -+#define SHADER_READY_LO 0x140 -+#define SHADER_READY_HI 0x144 -+#define TILER_READY_LO 0x150 -+#define TILER_READY_HI 0x154 -+#define L2_READY_LO 0x160 -+#define L2_READY_HI 0x164 -+ -+#define SHADER_PWRON_LO 0x180 -+#define SHADER_PWRON_HI 0x184 -+#define TILER_PWRON_LO 0x190 -+#define TILER_PWRON_HI 0x194 -+#define L2_PWRON_LO 0x1A0 -+#define L2_PWRON_HI 0x1A4 -+ -+#define SHADER_PWROFF_LO 0x1C0 -+#define SHADER_PWROFF_HI 0x1C4 -+#define TILER_PWROFF_LO 0x1D0 -+#define TILER_PWROFF_HI 0x1D4 -+#define L2_PWROFF_LO 0x1E0 -+#define L2_PWROFF_HI 0x1E4 -+ -+#define SHADER_PWRTRANS_LO 0x200 -+#define SHADER_PWRTRANS_HI 0x204 -+#define TILER_PWRTRANS_LO 0x210 -+#define TILER_PWRTRANS_HI 0x214 -+#define L2_PWRTRANS_LO 0x220 -+#define L2_PWRTRANS_HI 0x224 -+ -+#define SHADER_PWRACTIVE_LO 0x240 -+#define SHADER_PWRACTIVE_HI 0x244 -+#define TILER_PWRACTIVE_LO 0x250 -+#define TILER_PWRACTIVE_HI 0x254 -+#define L2_PWRACTIVE_LO 0x260 -+#define L2_PWRACTIVE_HI 0x264 -+ -+#define GPU_REVID 0x280 -+ -+#define GPU_COHERENCY_FEATURES 0x300 -+#define GPU_COHERENCY_PROT_BIT(name) BIT(GPU_COHERENCY_ ## name) -+ -+#define GPU_COHERENCY_PROTOCOL 0x304 -+#define GPU_COHERENCY_ACE 0 -+#define GPU_COHERENCY_ACE_LITE 1 -+#define GPU_COHERENCY_NONE 31 -+ -+#define MCU_CONTROL 0x700 -+#define MCU_CONTROL_ENABLE 1 -+#define MCU_CONTROL_AUTO 2 -+#define MCU_CONTROL_DISABLE 0 -+ -+#define MCU_STATUS 0x704 -+#define MCU_STATUS_DISABLED 0 -+#define MCU_STATUS_ENABLED 1 -+#define MCU_STATUS_HALT 2 -+#define MCU_STATUS_FATAL 3 -+ -+/* Job Control regs */ -+#define JOB_INT_RAWSTAT 0x1000 -+#define JOB_INT_CLEAR 0x1004 -+#define JOB_INT_MASK 0x1008 -+#define JOB_INT_STAT 0x100c -+#define JOB_INT_GLOBAL_IF BIT(31) -+#define JOB_INT_CSG_IF(x) BIT(x) -+ -+/* MMU regs */ -+#define MMU_INT_RAWSTAT 0x2000 -+#define MMU_INT_CLEAR 0x2004 -+#define MMU_INT_MASK 0x2008 -+#define MMU_INT_STAT 0x200c -+ -+/* AS_COMMAND register commands */ -+ -+#define MMU_BASE 0x2400 -+#define MMU_AS_SHIFT 6 -+#define MMU_AS(as) (MMU_BASE + ((as) << MMU_AS_SHIFT)) -+ -+#define AS_TRANSTAB_LO(as) (MMU_AS(as) + 0x0) -+#define AS_TRANSTAB_HI(as) (MMU_AS(as) + 0x4) -+#define AS_MEMATTR_LO(as) (MMU_AS(as) + 0x8) -+#define AS_MEMATTR_HI(as) (MMU_AS(as) + 0xC) -+#define AS_MEMATTR_AARCH64_INNER_ALLOC_IMPL (2 << 2) -+#define AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(w, r) ((3 << 2) | \ -+ ((w) ? BIT(0) : 0) | \ -+ ((r) ? BIT(1) : 0)) -+#define AS_MEMATTR_AARCH64_SH_MIDGARD_INNER (0 << 4) -+#define AS_MEMATTR_AARCH64_SH_CPU_INNER (1 << 4) -+#define AS_MEMATTR_AARCH64_SH_CPU_INNER_SHADER_COH (2 << 4) -+#define AS_MEMATTR_AARCH64_SHARED (0 << 6) -+#define AS_MEMATTR_AARCH64_INNER_OUTER_NC (1 << 6) -+#define AS_MEMATTR_AARCH64_INNER_OUTER_WB (2 << 6) -+#define AS_MEMATTR_AARCH64_FAULT (3 << 6) -+#define AS_LOCKADDR_LO(as) (MMU_AS(as) + 0x10) -+#define AS_LOCKADDR_HI(as) (MMU_AS(as) + 0x14) -+#define AS_COMMAND(as) (MMU_AS(as) + 0x18) -+#define AS_COMMAND_NOP 0 -+#define AS_COMMAND_UPDATE 1 -+#define AS_COMMAND_LOCK 2 -+#define AS_COMMAND_UNLOCK 3 -+#define AS_COMMAND_FLUSH_PT 4 -+#define AS_COMMAND_FLUSH_MEM 5 -+#define AS_LOCK_REGION_MIN_SIZE (1ULL << 15) -+#define AS_FAULTSTATUS(as) (MMU_AS(as) + 0x1C) -+#define AS_FAULTSTATUS_ACCESS_TYPE_MASK (0x3 << 8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC (0x0 << 8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_EX (0x1 << 8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_READ (0x2 << 8) -+#define AS_FAULTSTATUS_ACCESS_TYPE_WRITE (0x3 << 8) -+#define AS_FAULTADDRESS_LO(as) (MMU_AS(as) + 0x20) -+#define AS_FAULTADDRESS_HI(as) (MMU_AS(as) + 0x24) -+#define AS_STATUS(as) (MMU_AS(as) + 0x28) -+#define AS_STATUS_AS_ACTIVE BIT(0) -+#define AS_TRANSCFG_LO(as) (MMU_AS(as) + 0x30) -+#define AS_TRANSCFG_HI(as) (MMU_AS(as) + 0x34) -+#define AS_TRANSCFG_ADRMODE_UNMAPPED (1 << 0) -+#define AS_TRANSCFG_ADRMODE_IDENTITY (2 << 0) -+#define AS_TRANSCFG_ADRMODE_AARCH64_4K (6 << 0) -+#define AS_TRANSCFG_ADRMODE_AARCH64_64K (8 << 0) -+#define AS_TRANSCFG_INA_BITS(x) ((x) << 6) -+#define AS_TRANSCFG_OUTA_BITS(x) ((x) << 14) -+#define AS_TRANSCFG_SL_CONCAT BIT(22) -+#define AS_TRANSCFG_PTW_MEMATTR_NC (1 << 24) -+#define AS_TRANSCFG_PTW_MEMATTR_WB (2 << 24) -+#define AS_TRANSCFG_PTW_SH_NS (0 << 28) -+#define AS_TRANSCFG_PTW_SH_OS (2 << 28) -+#define AS_TRANSCFG_PTW_SH_IS (3 << 28) -+#define AS_TRANSCFG_PTW_RA BIT(30) -+#define AS_TRANSCFG_DISABLE_HIER_AP BIT(33) -+#define AS_TRANSCFG_DISABLE_AF_FAULT BIT(34) -+#define AS_TRANSCFG_WXN BIT(35) -+#define AS_TRANSCFG_XREADABLE BIT(36) -+#define AS_FAULTEXTRA_LO(as) (MMU_AS(as) + 0x38) -+#define AS_FAULTEXTRA_HI(as) (MMU_AS(as) + 0x3C) -+ -+#define CSF_GPU_LATEST_FLUSH_ID 0x10000 -+ -+#define CSF_DOORBELL(i) (0x80000 + ((i) * 0x10000)) -+#define CSF_GLB_DOORBELL_ID 0 -+ -+#define gpu_write(dev, reg, data) \ -+ writel(data, (dev)->iomem + (reg)) -+ -+#define gpu_read(dev, reg) \ -+ readl((dev)->iomem + (reg)) -+ -+#endif --- -2.42.0 - - -From 457d2130c5231bcbafd8cb9528f8230c1b5355e0 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:34 +0100 -Subject: [PATCH 53/81] drm/panthor: Add the device logical block - -The panthor driver is designed in a modular way, where each logical -block is dealing with a specific HW-block or software feature. In order -for those blocks to communicate with each other, we need a central -panthor_device collecting all the blocks, and exposing some common -features, like interrupt handling, power management, reset, ... - -This what this panthor_device logical block is about. - -v4: -- Check drmm_mutex_init() return code -- Fix panthor_device_reset_work() out path -- Fix the race in the unplug logic -- Fix typos -- Unplug blocks when something fails in panthor_device_init() -- Add Steve's R-b - -v3: -- Add acks for the MIT+GPL2 relicensing -- Fix 32-bit support -- Shorten the sections protected by panthor_device::pm::mmio_lock to fix - lock ordering issues. -- Rename panthor_device::pm::lock into panthor_device::pm::mmio_lock to - better reflect what this lock is protecting -- Use dev_err_probe() -- Make sure we call drm_dev_exit() when something fails half-way in - panthor_device_reset_work() -- Replace CSF_GPU_LATEST_FLUSH_ID_DEFAULT with a constant '1' and a - comment to explain. Also remove setting the dummy flush ID on suspend. -- Remove drm_WARN_ON() in panthor_exception_name() -- Check pirq->suspended in panthor_xxx_irq_raw_handler() - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-4-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_device.c | 544 +++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_device.h | 393 ++++++++++++++++ - 2 files changed, 937 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_device.c - create mode 100644 drivers/gpu/drm/panthor/panthor_device.h - -diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c -new file mode 100644 -index 000000000000..dda41a1dd47e ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_device.c -@@ -0,0 +1,544 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "panthor_sched.h" -+#include "panthor_device.h" -+#include "panthor_devfreq.h" -+#include "panthor_gpu.h" -+#include "panthor_fw.h" -+#include "panthor_mmu.h" -+#include "panthor_regs.h" -+ -+static int panthor_clk_init(struct panthor_device *ptdev) -+{ -+ ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL); -+ if (IS_ERR(ptdev->clks.core)) -+ return dev_err_probe(ptdev->base.dev, -+ PTR_ERR(ptdev->clks.core), -+ "get 'core' clock failed"); -+ -+ ptdev->clks.stacks = devm_clk_get_optional(ptdev->base.dev, "stacks"); -+ if (IS_ERR(ptdev->clks.stacks)) -+ return dev_err_probe(ptdev->base.dev, -+ PTR_ERR(ptdev->clks.stacks), -+ "get 'stacks' clock failed"); -+ -+ ptdev->clks.coregroup = devm_clk_get_optional(ptdev->base.dev, "coregroup"); -+ if (IS_ERR(ptdev->clks.coregroup)) -+ return dev_err_probe(ptdev->base.dev, -+ PTR_ERR(ptdev->clks.coregroup), -+ "get 'coregroup' clock failed"); -+ -+ drm_info(&ptdev->base, "clock rate = %lu\n", clk_get_rate(ptdev->clks.core)); -+ return 0; -+} -+ -+void panthor_device_unplug(struct panthor_device *ptdev) -+{ -+ /* This function can be called from two different path: the reset work -+ * and the platform device remove callback. drm_dev_unplug() doesn't -+ * deal with concurrent callers, so we have to protect drm_dev_unplug() -+ * calls with our own lock, and bail out if the device is already -+ * unplugged. -+ */ -+ mutex_lock(&ptdev->unplug.lock); -+ if (drm_dev_is_unplugged(&ptdev->base)) { -+ /* Someone beat us, release the lock and wait for the unplug -+ * operation to be reported as done. -+ **/ -+ mutex_unlock(&ptdev->unplug.lock); -+ wait_for_completion(&ptdev->unplug.done); -+ return; -+ } -+ -+ /* Call drm_dev_unplug() so any access to HW blocks happening after -+ * that point get rejected. -+ */ -+ drm_dev_unplug(&ptdev->base); -+ -+ /* We do the rest of the unplug with the unplug lock released, -+ * future callers will wait on ptdev->unplug.done anyway. -+ */ -+ mutex_unlock(&ptdev->unplug.lock); -+ -+ drm_WARN_ON(&ptdev->base, pm_runtime_get_sync(ptdev->base.dev) < 0); -+ -+ /* Now, try to cleanly shutdown the GPU before the device resources -+ * get reclaimed. -+ */ -+ panthor_sched_unplug(ptdev); -+ panthor_fw_unplug(ptdev); -+ panthor_mmu_unplug(ptdev); -+ panthor_gpu_unplug(ptdev); -+ -+ pm_runtime_dont_use_autosuspend(ptdev->base.dev); -+ pm_runtime_put_sync_suspend(ptdev->base.dev); -+ -+ /* Report the unplug operation as done to unblock concurrent -+ * panthor_device_unplug() callers. -+ */ -+ complete_all(&ptdev->unplug.done); -+} -+ -+static void panthor_device_reset_cleanup(struct drm_device *ddev, void *data) -+{ -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ -+ cancel_work_sync(&ptdev->reset.work); -+ destroy_workqueue(ptdev->reset.wq); -+} -+ -+static void panthor_device_reset_work(struct work_struct *work) -+{ -+ struct panthor_device *ptdev = container_of(work, struct panthor_device, reset.work); -+ int ret = 0, cookie; -+ -+ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) { -+ /* -+ * No need for a reset as the device has been (or will be) -+ * powered down -+ */ -+ atomic_set(&ptdev->reset.pending, 0); -+ return; -+ } -+ -+ if (!drm_dev_enter(&ptdev->base, &cookie)) -+ return; -+ -+ panthor_sched_pre_reset(ptdev); -+ panthor_fw_pre_reset(ptdev, true); -+ panthor_mmu_pre_reset(ptdev); -+ panthor_gpu_soft_reset(ptdev); -+ panthor_gpu_l2_power_on(ptdev); -+ panthor_mmu_post_reset(ptdev); -+ ret = panthor_fw_post_reset(ptdev); -+ if (ret) -+ goto out_dev_exit; -+ -+ atomic_set(&ptdev->reset.pending, 0); -+ panthor_sched_post_reset(ptdev); -+ -+out_dev_exit: -+ drm_dev_exit(cookie); -+ -+ if (ret) { -+ panthor_device_unplug(ptdev); -+ drm_err(&ptdev->base, "Failed to boot MCU after reset, making device unusable."); -+ } -+} -+ -+static bool panthor_device_is_initialized(struct panthor_device *ptdev) -+{ -+ return !!ptdev->scheduler; -+} -+ -+static void panthor_device_free_page(struct drm_device *ddev, void *data) -+{ -+ free_page((unsigned long)data); -+} -+ -+int panthor_device_init(struct panthor_device *ptdev) -+{ -+ struct resource *res; -+ struct page *p; -+ int ret; -+ -+ ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; -+ -+ init_completion(&ptdev->unplug.done); -+ ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); -+ if (ret) -+ return ret; -+ -+ ret = drmm_mutex_init(&ptdev->base, &ptdev->pm.mmio_lock); -+ if (ret) -+ return ret; -+ -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); -+ p = alloc_page(GFP_KERNEL | __GFP_ZERO); -+ if (!p) -+ return -ENOMEM; -+ -+ ptdev->pm.dummy_latest_flush = page_address(p); -+ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_free_page, -+ ptdev->pm.dummy_latest_flush); -+ if (ret) -+ return ret; -+ -+ /* -+ * Set the dummy page holding the latest flush to 1. This will cause the -+ * flush to avoided as we know it isn't necessary if the submission -+ * happens while the dummy page is mapped. Zero cannot be used because -+ * that means 'always flush'. -+ */ -+ *ptdev->pm.dummy_latest_flush = 1; -+ -+ INIT_WORK(&ptdev->reset.work, panthor_device_reset_work); -+ ptdev->reset.wq = alloc_ordered_workqueue("panthor-reset-wq", 0); -+ if (!ptdev->reset.wq) -+ return -ENOMEM; -+ -+ ret = drmm_add_action_or_reset(&ptdev->base, panthor_device_reset_cleanup, NULL); -+ if (ret) -+ return ret; -+ -+ ret = panthor_clk_init(ptdev); -+ if (ret) -+ return ret; -+ -+ ret = panthor_devfreq_init(ptdev); -+ if (ret) -+ return ret; -+ -+ ptdev->iomem = devm_platform_get_and_ioremap_resource(to_platform_device(ptdev->base.dev), -+ 0, &res); -+ if (IS_ERR(ptdev->iomem)) -+ return PTR_ERR(ptdev->iomem); -+ -+ ptdev->phys_addr = res->start; -+ -+ ret = devm_pm_runtime_enable(ptdev->base.dev); -+ if (ret) -+ return ret; -+ -+ ret = pm_runtime_resume_and_get(ptdev->base.dev); -+ if (ret) -+ return ret; -+ -+ ret = panthor_gpu_init(ptdev); -+ if (ret) -+ goto err_rpm_put; -+ -+ ret = panthor_mmu_init(ptdev); -+ if (ret) -+ goto err_unplug_gpu; -+ -+ ret = panthor_fw_init(ptdev); -+ if (ret) -+ goto err_unplug_mmu; -+ -+ ret = panthor_sched_init(ptdev); -+ if (ret) -+ goto err_unplug_fw; -+ -+ /* ~3 frames */ -+ pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50); -+ -+ ret = drm_dev_register(&ptdev->base, 0); -+ if (ret) -+ goto err_unplug_sched; -+ -+ pm_runtime_use_autosuspend(ptdev->base.dev); -+ pm_runtime_put_autosuspend(ptdev->base.dev); -+ return 0; -+ -+err_unplug_sched: -+ panthor_sched_unplug(ptdev); -+ -+err_unplug_fw: -+ panthor_fw_unplug(ptdev); -+ -+err_unplug_mmu: -+ panthor_mmu_unplug(ptdev); -+ -+err_unplug_gpu: -+ panthor_gpu_unplug(ptdev); -+ -+err_rpm_put: -+ pm_runtime_put_sync_suspend(ptdev->base.dev); -+ return ret; -+} -+ -+#define PANTHOR_EXCEPTION(id) \ -+ [DRM_PANTHOR_EXCEPTION_ ## id] = { \ -+ .name = #id, \ -+ } -+ -+struct panthor_exception_info { -+ const char *name; -+}; -+ -+static const struct panthor_exception_info panthor_exception_infos[] = { -+ PANTHOR_EXCEPTION(OK), -+ PANTHOR_EXCEPTION(TERMINATED), -+ PANTHOR_EXCEPTION(KABOOM), -+ PANTHOR_EXCEPTION(EUREKA), -+ PANTHOR_EXCEPTION(ACTIVE), -+ PANTHOR_EXCEPTION(CS_RES_TERM), -+ PANTHOR_EXCEPTION(CS_CONFIG_FAULT), -+ PANTHOR_EXCEPTION(CS_ENDPOINT_FAULT), -+ PANTHOR_EXCEPTION(CS_BUS_FAULT), -+ PANTHOR_EXCEPTION(CS_INSTR_INVALID), -+ PANTHOR_EXCEPTION(CS_CALL_STACK_OVERFLOW), -+ PANTHOR_EXCEPTION(CS_INHERIT_FAULT), -+ PANTHOR_EXCEPTION(INSTR_INVALID_PC), -+ PANTHOR_EXCEPTION(INSTR_INVALID_ENC), -+ PANTHOR_EXCEPTION(INSTR_BARRIER_FAULT), -+ PANTHOR_EXCEPTION(DATA_INVALID_FAULT), -+ PANTHOR_EXCEPTION(TILE_RANGE_FAULT), -+ PANTHOR_EXCEPTION(ADDR_RANGE_FAULT), -+ PANTHOR_EXCEPTION(IMPRECISE_FAULT), -+ PANTHOR_EXCEPTION(OOM), -+ PANTHOR_EXCEPTION(CSF_FW_INTERNAL_ERROR), -+ PANTHOR_EXCEPTION(CSF_RES_EVICTION_TIMEOUT), -+ PANTHOR_EXCEPTION(GPU_BUS_FAULT), -+ PANTHOR_EXCEPTION(GPU_SHAREABILITY_FAULT), -+ PANTHOR_EXCEPTION(SYS_SHAREABILITY_FAULT), -+ PANTHOR_EXCEPTION(GPU_CACHEABILITY_FAULT), -+ PANTHOR_EXCEPTION(TRANSLATION_FAULT_0), -+ PANTHOR_EXCEPTION(TRANSLATION_FAULT_1), -+ PANTHOR_EXCEPTION(TRANSLATION_FAULT_2), -+ PANTHOR_EXCEPTION(TRANSLATION_FAULT_3), -+ PANTHOR_EXCEPTION(TRANSLATION_FAULT_4), -+ PANTHOR_EXCEPTION(PERM_FAULT_0), -+ PANTHOR_EXCEPTION(PERM_FAULT_1), -+ PANTHOR_EXCEPTION(PERM_FAULT_2), -+ PANTHOR_EXCEPTION(PERM_FAULT_3), -+ PANTHOR_EXCEPTION(ACCESS_FLAG_1), -+ PANTHOR_EXCEPTION(ACCESS_FLAG_2), -+ PANTHOR_EXCEPTION(ACCESS_FLAG_3), -+ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_IN), -+ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT0), -+ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT1), -+ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT2), -+ PANTHOR_EXCEPTION(ADDR_SIZE_FAULT_OUT3), -+ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_0), -+ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_1), -+ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_2), -+ PANTHOR_EXCEPTION(MEM_ATTR_FAULT_3), -+}; -+ -+const char *panthor_exception_name(struct panthor_device *ptdev, u32 exception_code) -+{ -+ if (exception_code >= ARRAY_SIZE(panthor_exception_infos) || -+ !panthor_exception_infos[exception_code].name) -+ return "Unknown exception type"; -+ -+ return panthor_exception_infos[exception_code].name; -+} -+ -+static vm_fault_t panthor_mmio_vm_fault(struct vm_fault *vmf) -+{ -+ struct vm_area_struct *vma = vmf->vma; -+ struct panthor_device *ptdev = vma->vm_private_data; -+ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; -+ unsigned long pfn; -+ pgprot_t pgprot; -+ vm_fault_t ret; -+ bool active; -+ int cookie; -+ -+ if (!drm_dev_enter(&ptdev->base, &cookie)) -+ return VM_FAULT_SIGBUS; -+ -+ mutex_lock(&ptdev->pm.mmio_lock); -+ active = atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE; -+ -+ switch (panthor_device_mmio_offset(id)) { -+ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: -+ if (active) -+ pfn = __phys_to_pfn(ptdev->phys_addr + CSF_GPU_LATEST_FLUSH_ID); -+ else -+ pfn = virt_to_pfn(ptdev->pm.dummy_latest_flush); -+ break; -+ -+ default: -+ ret = VM_FAULT_SIGBUS; -+ goto out_unlock; -+ } -+ -+ pgprot = vma->vm_page_prot; -+ if (active) -+ pgprot = pgprot_noncached(pgprot); -+ -+ ret = vmf_insert_pfn_prot(vma, vmf->address, pfn, pgprot); -+ -+out_unlock: -+ mutex_unlock(&ptdev->pm.mmio_lock); -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static const struct vm_operations_struct panthor_mmio_vm_ops = { -+ .fault = panthor_mmio_vm_fault, -+}; -+ -+int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct *vma) -+{ -+ u64 id = (u64)vma->vm_pgoff << PAGE_SHIFT; -+ -+ switch (panthor_device_mmio_offset(id)) { -+ case DRM_PANTHOR_USER_FLUSH_ID_MMIO_OFFSET: -+ if (vma->vm_end - vma->vm_start != PAGE_SIZE || -+ (vma->vm_flags & (VM_WRITE | VM_EXEC))) -+ return -EINVAL; -+ -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ /* Defer actual mapping to the fault handler. */ -+ vma->vm_private_data = ptdev; -+ vma->vm_ops = &panthor_mmio_vm_ops; -+ vm_flags_set(vma, -+ VM_IO | VM_DONTCOPY | VM_DONTEXPAND | -+ VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP); -+ return 0; -+} -+ -+#ifdef CONFIG_PM -+int panthor_device_resume(struct device *dev) -+{ -+ struct panthor_device *ptdev = dev_get_drvdata(dev); -+ int ret, cookie; -+ -+ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_SUSPENDED) -+ return -EINVAL; -+ -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_RESUMING); -+ -+ ret = clk_prepare_enable(ptdev->clks.core); -+ if (ret) -+ goto err_set_suspended; -+ -+ ret = clk_prepare_enable(ptdev->clks.stacks); -+ if (ret) -+ goto err_disable_core_clk; -+ -+ ret = clk_prepare_enable(ptdev->clks.coregroup); -+ if (ret) -+ goto err_disable_stacks_clk; -+ -+ ret = panthor_devfreq_resume(ptdev); -+ if (ret) -+ goto err_disable_coregroup_clk; -+ -+ if (panthor_device_is_initialized(ptdev) && -+ drm_dev_enter(&ptdev->base, &cookie)) { -+ panthor_gpu_resume(ptdev); -+ panthor_mmu_resume(ptdev); -+ ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); -+ if (!ret) -+ panthor_sched_resume(ptdev); -+ -+ drm_dev_exit(cookie); -+ -+ if (ret) -+ goto err_devfreq_suspend; -+ } -+ -+ if (atomic_read(&ptdev->reset.pending)) -+ queue_work(ptdev->reset.wq, &ptdev->reset.work); -+ -+ /* Clear all IOMEM mappings pointing to this device after we've -+ * resumed. This way the fake mappings pointing to the dummy pages -+ * are removed and the real iomem mapping will be restored on next -+ * access. -+ */ -+ mutex_lock(&ptdev->pm.mmio_lock); -+ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, -+ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); -+ mutex_unlock(&ptdev->pm.mmio_lock); -+ return 0; -+ -+err_devfreq_suspend: -+ panthor_devfreq_suspend(ptdev); -+ -+err_disable_coregroup_clk: -+ clk_disable_unprepare(ptdev->clks.coregroup); -+ -+err_disable_stacks_clk: -+ clk_disable_unprepare(ptdev->clks.stacks); -+ -+err_disable_core_clk: -+ clk_disable_unprepare(ptdev->clks.core); -+ -+err_set_suspended: -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); -+ return ret; -+} -+ -+int panthor_device_suspend(struct device *dev) -+{ -+ struct panthor_device *ptdev = dev_get_drvdata(dev); -+ int ret, cookie; -+ -+ if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) -+ return -EINVAL; -+ -+ /* Clear all IOMEM mappings pointing to this device before we -+ * shutdown the power-domain and clocks. Failing to do that results -+ * in external aborts when the process accesses the iomem region. -+ * We change the state and call unmap_mapping_range() with the -+ * mmio_lock held to make sure the vm_fault handler won't set up -+ * invalid mappings. -+ */ -+ mutex_lock(&ptdev->pm.mmio_lock); -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDING); -+ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, -+ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); -+ mutex_unlock(&ptdev->pm.mmio_lock); -+ -+ if (panthor_device_is_initialized(ptdev) && -+ drm_dev_enter(&ptdev->base, &cookie)) { -+ cancel_work_sync(&ptdev->reset.work); -+ -+ /* We prepare everything as if we were resetting the GPU. -+ * The end of the reset will happen in the resume path though. -+ */ -+ panthor_sched_suspend(ptdev); -+ panthor_fw_suspend(ptdev); -+ panthor_mmu_suspend(ptdev); -+ panthor_gpu_suspend(ptdev); -+ drm_dev_exit(cookie); -+ } -+ -+ ret = panthor_devfreq_suspend(ptdev); -+ if (ret) { -+ if (panthor_device_is_initialized(ptdev) && -+ drm_dev_enter(&ptdev->base, &cookie)) { -+ panthor_gpu_resume(ptdev); -+ panthor_mmu_resume(ptdev); -+ drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); -+ panthor_sched_resume(ptdev); -+ drm_dev_exit(cookie); -+ } -+ -+ goto err_set_active; -+ } -+ -+ clk_disable_unprepare(ptdev->clks.coregroup); -+ clk_disable_unprepare(ptdev->clks.stacks); -+ clk_disable_unprepare(ptdev->clks.core); -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); -+ return 0; -+ -+err_set_active: -+ /* If something failed and we have to revert back to an -+ * active state, we also need to clear the MMIO userspace -+ * mappings, so any dumb pages that were mapped while we -+ * were trying to suspend gets invalidated. -+ */ -+ mutex_lock(&ptdev->pm.mmio_lock); -+ atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); -+ unmap_mapping_range(ptdev->base.anon_inode->i_mapping, -+ DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); -+ mutex_unlock(&ptdev->pm.mmio_lock); -+ return ret; -+} -+#endif -diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h -new file mode 100644 -index 000000000000..7e385541b448 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_device.h -@@ -0,0 +1,393 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_DEVICE_H__ -+#define __PANTHOR_DEVICE_H__ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+struct panthor_csf; -+struct panthor_csf_ctx; -+struct panthor_device; -+struct panthor_gpu; -+struct panthor_group_pool; -+struct panthor_heap_pool; -+struct panthor_job; -+struct panthor_mmu; -+struct panthor_fw; -+struct panthor_perfcnt; -+struct panthor_vm; -+struct panthor_vm_pool; -+ -+/** -+ * enum panthor_device_pm_state - PM state -+ */ -+enum panthor_device_pm_state { -+ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDED: Device is suspended. */ -+ PANTHOR_DEVICE_PM_STATE_SUSPENDED = 0, -+ -+ /** @PANTHOR_DEVICE_PM_STATE_RESUMING: Device is being resumed. */ -+ PANTHOR_DEVICE_PM_STATE_RESUMING, -+ -+ /** @PANTHOR_DEVICE_PM_STATE_ACTIVE: Device is active. */ -+ PANTHOR_DEVICE_PM_STATE_ACTIVE, -+ -+ /** @PANTHOR_DEVICE_PM_STATE_SUSPENDING: Device is being suspended. */ -+ PANTHOR_DEVICE_PM_STATE_SUSPENDING, -+}; -+ -+/** -+ * struct panthor_irq - IRQ data -+ * -+ * Used to automate IRQ handling for the 3 different IRQs we have in this driver. -+ */ -+struct panthor_irq { -+ /** @ptdev: Panthor device */ -+ struct panthor_device *ptdev; -+ -+ /** @irq: IRQ number. */ -+ int irq; -+ -+ /** @mask: Current mask being applied to xxx_INT_MASK. */ -+ u32 mask; -+ -+ /** @suspended: Set to true when the IRQ is suspended. */ -+ atomic_t suspended; -+}; -+ -+/** -+ * struct panthor_device - Panthor device -+ */ -+struct panthor_device { -+ /** @base: Base drm_device. */ -+ struct drm_device base; -+ -+ /** @phys_addr: Physical address of the iomem region. */ -+ phys_addr_t phys_addr; -+ -+ /** @iomem: CPU mapping of the IOMEM region. */ -+ void __iomem *iomem; -+ -+ /** @clks: GPU clocks. */ -+ struct { -+ /** @core: Core clock. */ -+ struct clk *core; -+ -+ /** @stacks: Stacks clock. This clock is optional. */ -+ struct clk *stacks; -+ -+ /** @coregroup: Core group clock. This clock is optional. */ -+ struct clk *coregroup; -+ } clks; -+ -+ /** @coherent: True if the CPU/GPU are memory coherent. */ -+ bool coherent; -+ -+ /** @gpu_info: GPU information. */ -+ struct drm_panthor_gpu_info gpu_info; -+ -+ /** @csif_info: Command stream interface information. */ -+ struct drm_panthor_csif_info csif_info; -+ -+ /** @gpu: GPU management data. */ -+ struct panthor_gpu *gpu; -+ -+ /** @fw: FW management data. */ -+ struct panthor_fw *fw; -+ -+ /** @mmu: MMU management data. */ -+ struct panthor_mmu *mmu; -+ -+ /** @scheduler: Scheduler management data. */ -+ struct panthor_scheduler *scheduler; -+ -+ /** @devfreq: Device frequency scaling management data. */ -+ struct panthor_devfreq *devfreq; -+ -+ /** @unplug: Device unplug related fields. */ -+ struct { -+ /** @lock: Lock used to serialize unplug operations. */ -+ struct mutex lock; -+ -+ /** -+ * @done: Completion object signaled when the unplug -+ * operation is done. -+ */ -+ struct completion done; -+ } unplug; -+ -+ /** @reset: Reset related fields. */ -+ struct { -+ /** @wq: Ordered worqueud used to schedule reset operations. */ -+ struct workqueue_struct *wq; -+ -+ /** @work: Reset work. */ -+ struct work_struct work; -+ -+ /** @pending: Set to true if a reset is pending. */ -+ atomic_t pending; -+ } reset; -+ -+ /** @pm: Power management related data. */ -+ struct { -+ /** @state: Power state. */ -+ atomic_t state; -+ -+ /** -+ * @mmio_lock: Lock protecting MMIO userspace CPU mappings. -+ * -+ * This is needed to ensure we map the dummy IO pages when -+ * the device is being suspended, and the real IO pages when -+ * the device is being resumed. We can't just do with the -+ * state atomicity to deal with this race. -+ */ -+ struct mutex mmio_lock; -+ -+ /** -+ * @dummy_latest_flush: Dummy LATEST_FLUSH page. -+ * -+ * Used to replace the real LATEST_FLUSH page when the GPU -+ * is suspended. -+ */ -+ u32 *dummy_latest_flush; -+ } pm; -+}; -+ -+/** -+ * struct panthor_file - Panthor file -+ */ -+struct panthor_file { -+ /** @ptdev: Device attached to this file. */ -+ struct panthor_device *ptdev; -+ -+ /** @vms: VM pool attached to this file. */ -+ struct panthor_vm_pool *vms; -+ -+ /** @groups: Scheduling group pool attached to this file. */ -+ struct panthor_group_pool *groups; -+}; -+ -+int panthor_device_init(struct panthor_device *ptdev); -+void panthor_device_unplug(struct panthor_device *ptdev); -+ -+/** -+ * panthor_device_schedule_reset() - Schedules a reset operation -+ */ -+static inline void panthor_device_schedule_reset(struct panthor_device *ptdev) -+{ -+ if (!atomic_cmpxchg(&ptdev->reset.pending, 0, 1) && -+ atomic_read(&ptdev->pm.state) == PANTHOR_DEVICE_PM_STATE_ACTIVE) -+ queue_work(ptdev->reset.wq, &ptdev->reset.work); -+} -+ -+/** -+ * panthor_device_reset_is_pending() - Checks if a reset is pending. -+ * -+ * Return: true if a reset is pending, false otherwise. -+ */ -+static inline bool panthor_device_reset_is_pending(struct panthor_device *ptdev) -+{ -+ return atomic_read(&ptdev->reset.pending) != 0; -+} -+ -+int panthor_device_mmap_io(struct panthor_device *ptdev, -+ struct vm_area_struct *vma); -+ -+int panthor_device_resume(struct device *dev); -+int panthor_device_suspend(struct device *dev); -+ -+enum drm_panthor_exception_type { -+ DRM_PANTHOR_EXCEPTION_OK = 0x00, -+ DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, -+ DRM_PANTHOR_EXCEPTION_KABOOM = 0x05, -+ DRM_PANTHOR_EXCEPTION_EUREKA = 0x06, -+ DRM_PANTHOR_EXCEPTION_ACTIVE = 0x08, -+ DRM_PANTHOR_EXCEPTION_CS_RES_TERM = 0x0f, -+ DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT = 0x3f, -+ DRM_PANTHOR_EXCEPTION_CS_CONFIG_FAULT = 0x40, -+ DRM_PANTHOR_EXCEPTION_CS_ENDPOINT_FAULT = 0x44, -+ DRM_PANTHOR_EXCEPTION_CS_BUS_FAULT = 0x48, -+ DRM_PANTHOR_EXCEPTION_CS_INSTR_INVALID = 0x49, -+ DRM_PANTHOR_EXCEPTION_CS_CALL_STACK_OVERFLOW = 0x4a, -+ DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT = 0x4b, -+ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_PC = 0x50, -+ DRM_PANTHOR_EXCEPTION_INSTR_INVALID_ENC = 0x51, -+ DRM_PANTHOR_EXCEPTION_INSTR_BARRIER_FAULT = 0x55, -+ DRM_PANTHOR_EXCEPTION_DATA_INVALID_FAULT = 0x58, -+ DRM_PANTHOR_EXCEPTION_TILE_RANGE_FAULT = 0x59, -+ DRM_PANTHOR_EXCEPTION_ADDR_RANGE_FAULT = 0x5a, -+ DRM_PANTHOR_EXCEPTION_IMPRECISE_FAULT = 0x5b, -+ DRM_PANTHOR_EXCEPTION_OOM = 0x60, -+ DRM_PANTHOR_EXCEPTION_CSF_FW_INTERNAL_ERROR = 0x68, -+ DRM_PANTHOR_EXCEPTION_CSF_RES_EVICTION_TIMEOUT = 0x69, -+ DRM_PANTHOR_EXCEPTION_GPU_BUS_FAULT = 0x80, -+ DRM_PANTHOR_EXCEPTION_GPU_SHAREABILITY_FAULT = 0x88, -+ DRM_PANTHOR_EXCEPTION_SYS_SHAREABILITY_FAULT = 0x89, -+ DRM_PANTHOR_EXCEPTION_GPU_CACHEABILITY_FAULT = 0x8a, -+ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_0 = 0xc0, -+ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_1 = 0xc1, -+ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_2 = 0xc2, -+ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_3 = 0xc3, -+ DRM_PANTHOR_EXCEPTION_TRANSLATION_FAULT_4 = 0xc4, -+ DRM_PANTHOR_EXCEPTION_PERM_FAULT_0 = 0xc8, -+ DRM_PANTHOR_EXCEPTION_PERM_FAULT_1 = 0xc9, -+ DRM_PANTHOR_EXCEPTION_PERM_FAULT_2 = 0xca, -+ DRM_PANTHOR_EXCEPTION_PERM_FAULT_3 = 0xcb, -+ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_1 = 0xd9, -+ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_2 = 0xda, -+ DRM_PANTHOR_EXCEPTION_ACCESS_FLAG_3 = 0xdb, -+ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_IN = 0xe0, -+ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT0 = 0xe4, -+ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT1 = 0xe5, -+ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT2 = 0xe6, -+ DRM_PANTHOR_EXCEPTION_ADDR_SIZE_FAULT_OUT3 = 0xe7, -+ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_0 = 0xe8, -+ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_1 = 0xe9, -+ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_2 = 0xea, -+ DRM_PANTHOR_EXCEPTION_MEM_ATTR_FAULT_3 = 0xeb, -+}; -+ -+/** -+ * panthor_exception_is_fault() - Checks if an exception is a fault. -+ * -+ * Return: true if the exception is a fault, false otherwise. -+ */ -+static inline bool -+panthor_exception_is_fault(u32 exception_code) -+{ -+ return exception_code > DRM_PANTHOR_EXCEPTION_MAX_NON_FAULT; -+} -+ -+const char *panthor_exception_name(struct panthor_device *ptdev, -+ u32 exception_code); -+ -+/** -+ * PANTHOR_IRQ_HANDLER() - Define interrupt handlers and the interrupt -+ * registration function. -+ * -+ * The boiler-plate to gracefully deal with shared interrupts is -+ * auto-generated. All you have to do is call PANTHOR_IRQ_HANDLER() -+ * just after the actual handler. The handler prototype is: -+ * -+ * void (*handler)(struct panthor_device *, u32 status); -+ */ -+#define PANTHOR_IRQ_HANDLER(__name, __reg_prefix, __handler) \ -+static irqreturn_t panthor_ ## __name ## _irq_raw_handler(int irq, void *data) \ -+{ \ -+ struct panthor_irq *pirq = data; \ -+ struct panthor_device *ptdev = pirq->ptdev; \ -+ \ -+ if (atomic_read(&pirq->suspended)) \ -+ return IRQ_NONE; \ -+ if (!gpu_read(ptdev, __reg_prefix ## _INT_STAT)) \ -+ return IRQ_NONE; \ -+ \ -+ gpu_write(ptdev, __reg_prefix ## _INT_MASK, 0); \ -+ return IRQ_WAKE_THREAD; \ -+} \ -+ \ -+static irqreturn_t panthor_ ## __name ## _irq_threaded_handler(int irq, void *data) \ -+{ \ -+ struct panthor_irq *pirq = data; \ -+ struct panthor_device *ptdev = pirq->ptdev; \ -+ irqreturn_t ret = IRQ_NONE; \ -+ \ -+ while (true) { \ -+ u32 status = gpu_read(ptdev, __reg_prefix ## _INT_RAWSTAT) & pirq->mask; \ -+ \ -+ if (!status) \ -+ break; \ -+ \ -+ gpu_write(ptdev, __reg_prefix ## _INT_CLEAR, status); \ -+ \ -+ __handler(ptdev, status); \ -+ ret = IRQ_HANDLED; \ -+ } \ -+ \ -+ if (!atomic_read(&pirq->suspended)) \ -+ gpu_write(ptdev, __reg_prefix ## _INT_MASK, pirq->mask); \ -+ \ -+ return ret; \ -+} \ -+ \ -+static inline void panthor_ ## __name ## _irq_suspend(struct panthor_irq *pirq) \ -+{ \ -+ int cookie; \ -+ \ -+ atomic_set(&pirq->suspended, true); \ -+ \ -+ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ -+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, 0); \ -+ synchronize_irq(pirq->irq); \ -+ drm_dev_exit(cookie); \ -+ } \ -+ \ -+ pirq->mask = 0; \ -+} \ -+ \ -+static inline void panthor_ ## __name ## _irq_resume(struct panthor_irq *pirq, u32 mask) \ -+{ \ -+ int cookie; \ -+ \ -+ atomic_set(&pirq->suspended, false); \ -+ pirq->mask = mask; \ -+ \ -+ if (drm_dev_enter(&pirq->ptdev->base, &cookie)) { \ -+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_CLEAR, mask); \ -+ gpu_write(pirq->ptdev, __reg_prefix ## _INT_MASK, mask); \ -+ drm_dev_exit(cookie); \ -+ } \ -+} \ -+ \ -+static int panthor_request_ ## __name ## _irq(struct panthor_device *ptdev, \ -+ struct panthor_irq *pirq, \ -+ int irq, u32 mask) \ -+{ \ -+ pirq->ptdev = ptdev; \ -+ pirq->irq = irq; \ -+ panthor_ ## __name ## _irq_resume(pirq, mask); \ -+ \ -+ return devm_request_threaded_irq(ptdev->base.dev, irq, \ -+ panthor_ ## __name ## _irq_raw_handler, \ -+ panthor_ ## __name ## _irq_threaded_handler, \ -+ IRQF_SHARED, KBUILD_MODNAME "-" # __name, \ -+ pirq); \ -+} -+ -+/** -+ * panthor_device_mmio_offset() - Turn a user MMIO offset into a kernel one -+ * @offset: Offset to convert. -+ * -+ * With 32-bit systems being limited by the 32-bit representation of mmap2's -+ * pgoffset field, we need to make the MMIO offset arch specific. This function -+ * converts a user MMIO offset into something the kernel driver understands. -+ * -+ * If the kernel and userspace architecture match, the offset is unchanged. If -+ * the kernel is 64-bit and userspace is 32-bit, the offset is adjusted to match -+ * 64-bit offsets. 32-bit kernel with 64-bit userspace is impossible. -+ * -+ * Return: Adjusted offset. -+ */ -+static inline u64 panthor_device_mmio_offset(u64 offset) -+{ -+#ifdef CONFIG_ARM64 -+ if (test_tsk_thread_flag(current, TIF_32BIT)) -+ offset += DRM_PANTHOR_USER_MMIO_OFFSET_64BIT - DRM_PANTHOR_USER_MMIO_OFFSET_32BIT; -+#endif -+ -+ return offset; -+} -+ -+extern struct workqueue_struct *panthor_cleanup_wq; -+ -+#endif --- -2.42.0 - - -From 4a51e03733c5cfc818cbb9a55507b10c16c1b243 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:35 +0100 -Subject: [PATCH 54/81] drm/panthor: Add the GPU logical block - -Handles everything that's not related to the FW, the MMU or the -scheduler. This is the block dealing with the GPU property retrieval, -the GPU block power on/off logic, and some global operations, like -global cache flushing. - -v4: -- Expose CORE_FEATURES through DEV_QUERY - -v3: -- Add acks for the MIT/GPL2 relicensing -- Use macros to extract GPU ID info -- Make sure we reset clear pending_reqs bits when wait_event_timeout() - times out but the corresponding bit is cleared in GPU_INT_RAWSTAT - (can happen if the IRQ is masked or HW takes to long to call the IRQ - handler) -- GPU_MODEL now takes separate arch and product majors to be more - readable. -- Drop GPU_IRQ_MCU_STATUS_CHANGED from interrupt mask. -- Handle GPU_IRQ_PROTM_FAULT correctly (don't output registers that are - not updated for protected interrupts). -- Minor code tidy ups - -Cc: Alexey Sheplyakov # MIT+GPL2 relicensing -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Link: https://lore.kernel.org/r/20240122163047.1954733-5-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_gpu.c | 482 ++++++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_gpu.h | 52 +++ - 2 files changed, 534 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.c - create mode 100644 drivers/gpu/drm/panthor/panthor_gpu.h - -diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c -new file mode 100644 -index 000000000000..014e0f7c1b13 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_gpu.c -@@ -0,0 +1,482 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Linaro, Ltd., Rob Herring */ -+/* Copyright 2019 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "panthor_device.h" -+#include "panthor_gpu.h" -+#include "panthor_regs.h" -+ -+/** -+ * struct panthor_gpu - GPU block management data. -+ */ -+struct panthor_gpu { -+ /** @irq: GPU irq. */ -+ struct panthor_irq irq; -+ -+ /** @reqs_lock: Lock protecting access to pending_reqs. */ -+ spinlock_t reqs_lock; -+ -+ /** @pending_reqs: Pending GPU requests. */ -+ u32 pending_reqs; -+ -+ /** @reqs_acked: GPU request wait queue. */ -+ wait_queue_head_t reqs_acked; -+}; -+ -+/** -+ * struct panthor_model - GPU model description -+ */ -+struct panthor_model { -+ /** @name: Model name. */ -+ const char *name; -+ -+ /** @arch_major: Major version number of architecture. */ -+ u8 arch_major; -+ -+ /** @product_major: Major version number of product. */ -+ u8 product_major; -+}; -+ -+/** -+ * GPU_MODEL() - Define a GPU model. A GPU product can be uniquely identified -+ * by a combination of the major architecture version and the major product -+ * version. -+ * @name: Name for the GPU model. -+ * @_arch_major: Architecture major. -+ * @_product_major: Product major. -+ */ -+#define GPU_MODEL(_name, _arch_major, _product_major) \ -+{\ -+ .name = __stringify(_name), \ -+ .arch_major = _arch_major, \ -+ .product_major = _product_major, \ -+} -+ -+static const struct panthor_model gpu_models[] = { -+ GPU_MODEL(g610, 10, 7), -+ {}, -+}; -+ -+#define GPU_INTERRUPTS_MASK \ -+ (GPU_IRQ_FAULT | \ -+ GPU_IRQ_PROTM_FAULT | \ -+ GPU_IRQ_RESET_COMPLETED | \ -+ GPU_IRQ_CLEAN_CACHES_COMPLETED) -+ -+static void panthor_gpu_init_info(struct panthor_device *ptdev) -+{ -+ const struct panthor_model *model; -+ u32 arch_major, product_major; -+ u32 major, minor, status; -+ unsigned int i; -+ -+ ptdev->gpu_info.gpu_id = gpu_read(ptdev, GPU_ID); -+ ptdev->gpu_info.csf_id = gpu_read(ptdev, GPU_CSF_ID); -+ ptdev->gpu_info.gpu_rev = gpu_read(ptdev, GPU_REVID); -+ ptdev->gpu_info.core_features = gpu_read(ptdev, GPU_CORE_FEATURES); -+ ptdev->gpu_info.l2_features = gpu_read(ptdev, GPU_L2_FEATURES); -+ ptdev->gpu_info.tiler_features = gpu_read(ptdev, GPU_TILER_FEATURES); -+ ptdev->gpu_info.mem_features = gpu_read(ptdev, GPU_MEM_FEATURES); -+ ptdev->gpu_info.mmu_features = gpu_read(ptdev, GPU_MMU_FEATURES); -+ ptdev->gpu_info.thread_features = gpu_read(ptdev, GPU_THREAD_FEATURES); -+ ptdev->gpu_info.max_threads = gpu_read(ptdev, GPU_THREAD_MAX_THREADS); -+ ptdev->gpu_info.thread_max_workgroup_size = gpu_read(ptdev, GPU_THREAD_MAX_WORKGROUP_SIZE); -+ ptdev->gpu_info.thread_max_barrier_size = gpu_read(ptdev, GPU_THREAD_MAX_BARRIER_SIZE); -+ ptdev->gpu_info.coherency_features = gpu_read(ptdev, GPU_COHERENCY_FEATURES); -+ for (i = 0; i < 4; i++) -+ ptdev->gpu_info.texture_features[i] = gpu_read(ptdev, GPU_TEXTURE_FEATURES(i)); -+ -+ ptdev->gpu_info.as_present = gpu_read(ptdev, GPU_AS_PRESENT); -+ -+ ptdev->gpu_info.shader_present = gpu_read(ptdev, GPU_SHADER_PRESENT_LO); -+ ptdev->gpu_info.shader_present |= (u64)gpu_read(ptdev, GPU_SHADER_PRESENT_HI) << 32; -+ -+ ptdev->gpu_info.tiler_present = gpu_read(ptdev, GPU_TILER_PRESENT_LO); -+ ptdev->gpu_info.tiler_present |= (u64)gpu_read(ptdev, GPU_TILER_PRESENT_HI) << 32; -+ -+ ptdev->gpu_info.l2_present = gpu_read(ptdev, GPU_L2_PRESENT_LO); -+ ptdev->gpu_info.l2_present |= (u64)gpu_read(ptdev, GPU_L2_PRESENT_HI) << 32; -+ -+ arch_major = GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id); -+ product_major = GPU_PROD_MAJOR(ptdev->gpu_info.gpu_id); -+ major = GPU_VER_MAJOR(ptdev->gpu_info.gpu_id); -+ minor = GPU_VER_MINOR(ptdev->gpu_info.gpu_id); -+ status = GPU_VER_STATUS(ptdev->gpu_info.gpu_id); -+ -+ for (model = gpu_models; model->name; model++) { -+ if (model->arch_major == arch_major && -+ model->product_major == product_major) -+ break; -+ } -+ -+ drm_info(&ptdev->base, -+ "mali-%s id 0x%x major 0x%x minor 0x%x status 0x%x", -+ model->name ?: "unknown", ptdev->gpu_info.gpu_id >> 16, -+ major, minor, status); -+ -+ drm_info(&ptdev->base, -+ "Features: L2:%#x Tiler:%#x Mem:%#x MMU:%#x AS:%#x", -+ ptdev->gpu_info.l2_features, -+ ptdev->gpu_info.tiler_features, -+ ptdev->gpu_info.mem_features, -+ ptdev->gpu_info.mmu_features, -+ ptdev->gpu_info.as_present); -+ -+ drm_info(&ptdev->base, -+ "shader_present=0x%0llx l2_present=0x%0llx tiler_present=0x%0llx", -+ ptdev->gpu_info.shader_present, ptdev->gpu_info.l2_present, -+ ptdev->gpu_info.tiler_present); -+} -+ -+static void panthor_gpu_irq_handler(struct panthor_device *ptdev, u32 status) -+{ -+ if (status & GPU_IRQ_FAULT) { -+ u32 fault_status = gpu_read(ptdev, GPU_FAULT_STATUS); -+ u64 address = ((u64)gpu_read(ptdev, GPU_FAULT_ADDR_HI) << 32) | -+ gpu_read(ptdev, GPU_FAULT_ADDR_LO); -+ -+ drm_warn(&ptdev->base, "GPU Fault 0x%08x (%s) at 0x%016llx\n", -+ fault_status, panthor_exception_name(ptdev, fault_status & 0xFF), -+ address); -+ } -+ if (status & GPU_IRQ_PROTM_FAULT) -+ drm_warn(&ptdev->base, "GPU Fault in protected mode\n"); -+ -+ spin_lock(&ptdev->gpu->reqs_lock); -+ if (status & ptdev->gpu->pending_reqs) { -+ ptdev->gpu->pending_reqs &= ~status; -+ wake_up_all(&ptdev->gpu->reqs_acked); -+ } -+ spin_unlock(&ptdev->gpu->reqs_lock); -+} -+PANTHOR_IRQ_HANDLER(gpu, GPU, panthor_gpu_irq_handler); -+ -+/** -+ * panthor_gpu_unplug() - Called when the GPU is unplugged. -+ * @ptdev: Device to unplug. -+ */ -+void panthor_gpu_unplug(struct panthor_device *ptdev) -+{ -+ unsigned long flags; -+ -+ /* Make sure the IRQ handler is not running after that point. */ -+ panthor_gpu_irq_suspend(&ptdev->gpu->irq); -+ -+ /* Wake-up all waiters. */ -+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); -+ ptdev->gpu->pending_reqs = 0; -+ wake_up_all(&ptdev->gpu->reqs_acked); -+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); -+} -+ -+/** -+ * panthor_gpu_init() - Initialize the GPU block -+ * @ptdev: Device. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_init(struct panthor_device *ptdev) -+{ -+ struct panthor_gpu *gpu; -+ u32 pa_bits; -+ int ret, irq; -+ -+ gpu = drmm_kzalloc(&ptdev->base, sizeof(*gpu), GFP_KERNEL); -+ if (!gpu) -+ return -ENOMEM; -+ -+ spin_lock_init(&gpu->reqs_lock); -+ init_waitqueue_head(&gpu->reqs_acked); -+ ptdev->gpu = gpu; -+ panthor_gpu_init_info(ptdev); -+ -+ dma_set_max_seg_size(ptdev->base.dev, UINT_MAX); -+ pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); -+ ret = dma_set_mask_and_coherent(ptdev->base.dev, DMA_BIT_MASK(pa_bits)); -+ if (ret) -+ return ret; -+ -+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "gpu"); -+ if (irq <= 0) -+ return ret; -+ -+ ret = panthor_request_gpu_irq(ptdev, &ptdev->gpu->irq, irq, GPU_INTERRUPTS_MASK); -+ if (ret) -+ return ret; -+ -+ return 0; -+} -+ -+/** -+ * panthor_gpu_block_power_off() - Power-off a specific block of the GPU -+ * @ptdev: Device. -+ * @blk_name: Block name. -+ * @pwroff_reg: Power-off register for this block. -+ * @pwrtrans_reg: Power transition register for this block. -+ * @mask: Sub-elements to power-off. -+ * @timeout_us: Timeout in microseconds. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_block_power_off(struct panthor_device *ptdev, -+ const char *blk_name, -+ u32 pwroff_reg, u32 pwrtrans_reg, -+ u64 mask, u32 timeout_us) -+{ -+ u32 val, i; -+ int ret; -+ -+ for (i = 0; i < 2; i++) { -+ u32 mask32 = mask >> (i * 32); -+ -+ if (!mask32) -+ continue; -+ -+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), -+ val, !(mask32 & val), -+ 100, timeout_us); -+ if (ret) { -+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", -+ blk_name, mask); -+ return ret; -+ } -+ } -+ -+ if (mask & GENMASK(31, 0)) -+ gpu_write(ptdev, pwroff_reg, mask); -+ -+ if (mask >> 32) -+ gpu_write(ptdev, pwroff_reg + 4, mask >> 32); -+ -+ for (i = 0; i < 2; i++) { -+ u32 mask32 = mask >> (i * 32); -+ -+ if (!mask32) -+ continue; -+ -+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), -+ val, !(mask & val), -+ 100, timeout_us); -+ if (ret) { -+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", -+ blk_name, mask); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_gpu_block_power_on() - Power-on a specific block of the GPU -+ * @ptdev: Device. -+ * @blk_name: Block name. -+ * @pwron_reg: Power-on register for this block. -+ * @pwrtrans_reg: Power transition register for this block. -+ * @rdy_reg: Power transition ready register. -+ * @mask: Sub-elements to power-on. -+ * @timeout_us: Timeout in microseconds. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_block_power_on(struct panthor_device *ptdev, -+ const char *blk_name, -+ u32 pwron_reg, u32 pwrtrans_reg, -+ u32 rdy_reg, u64 mask, u32 timeout_us) -+{ -+ u32 val, i; -+ int ret; -+ -+ for (i = 0; i < 2; i++) { -+ u32 mask32 = mask >> (i * 32); -+ -+ if (!mask32) -+ continue; -+ -+ ret = readl_relaxed_poll_timeout(ptdev->iomem + pwrtrans_reg + (i * 4), -+ val, !(mask32 & val), -+ 100, timeout_us); -+ if (ret) { -+ drm_err(&ptdev->base, "timeout waiting on %s:%llx power transition", -+ blk_name, mask); -+ return ret; -+ } -+ } -+ -+ if (mask & GENMASK(31, 0)) -+ gpu_write(ptdev, pwron_reg, mask); -+ -+ if (mask >> 32) -+ gpu_write(ptdev, pwron_reg + 4, mask >> 32); -+ -+ for (i = 0; i < 2; i++) { -+ u32 mask32 = mask >> (i * 32); -+ -+ if (!mask32) -+ continue; -+ -+ ret = readl_relaxed_poll_timeout(ptdev->iomem + rdy_reg + (i * 4), -+ val, (mask32 & val) == mask32, -+ 100, timeout_us); -+ if (ret) { -+ drm_err(&ptdev->base, "timeout waiting on %s:%llx readyness", -+ blk_name, mask); -+ return ret; -+ } -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_gpu_l2_power_on() - Power-on the L2-cache -+ * @ptdev: Device. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_l2_power_on(struct panthor_device *ptdev) -+{ -+ if (ptdev->gpu_info.l2_present != 1) { -+ /* -+ * Only support one core group now. -+ * ~(l2_present - 1) unsets all bits in l2_present except -+ * the bottom bit. (l2_present - 2) has all the bits in -+ * the first core group set. AND them together to generate -+ * a mask of cores in the first core group. -+ */ -+ u64 core_mask = ~(ptdev->gpu_info.l2_present - 1) & -+ (ptdev->gpu_info.l2_present - 2); -+ drm_info_once(&ptdev->base, "using only 1st core group (%lu cores from %lu)\n", -+ hweight64(core_mask), -+ hweight64(ptdev->gpu_info.shader_present)); -+ } -+ -+ return panthor_gpu_power_on(ptdev, L2, 1, 20000); -+} -+ -+/** -+ * panthor_gpu_flush_caches() - Flush caches -+ * @ptdev: Device. -+ * @l2: L2 flush type. -+ * @lsc: LSC flush type. -+ * @other: Other flush type. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_flush_caches(struct panthor_device *ptdev, -+ u32 l2, u32 lsc, u32 other) -+{ -+ bool timedout = false; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); -+ if (!drm_WARN_ON(&ptdev->base, -+ ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED)) { -+ ptdev->gpu->pending_reqs |= GPU_IRQ_CLEAN_CACHES_COMPLETED; -+ gpu_write(ptdev, GPU_CMD, GPU_FLUSH_CACHES(l2, lsc, other)); -+ } -+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); -+ -+ if (!wait_event_timeout(ptdev->gpu->reqs_acked, -+ !(ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED), -+ msecs_to_jiffies(100))) { -+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); -+ if ((ptdev->gpu->pending_reqs & GPU_IRQ_CLEAN_CACHES_COMPLETED) != 0 && -+ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_CLEAN_CACHES_COMPLETED)) -+ timedout = true; -+ else -+ ptdev->gpu->pending_reqs &= ~GPU_IRQ_CLEAN_CACHES_COMPLETED; -+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); -+ } -+ -+ if (timedout) { -+ drm_err(&ptdev->base, "Flush caches timeout"); -+ return -ETIMEDOUT; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_gpu_soft_reset() - Issue a soft-reset -+ * @ptdev: Device. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_gpu_soft_reset(struct panthor_device *ptdev) -+{ -+ bool timedout = false; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); -+ if (!drm_WARN_ON(&ptdev->base, -+ ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED)) { -+ ptdev->gpu->pending_reqs |= GPU_IRQ_RESET_COMPLETED; -+ gpu_write(ptdev, GPU_INT_CLEAR, GPU_IRQ_RESET_COMPLETED); -+ gpu_write(ptdev, GPU_CMD, GPU_SOFT_RESET); -+ } -+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); -+ -+ if (!wait_event_timeout(ptdev->gpu->reqs_acked, -+ !(ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED), -+ msecs_to_jiffies(100))) { -+ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); -+ if ((ptdev->gpu->pending_reqs & GPU_IRQ_RESET_COMPLETED) != 0 && -+ !(gpu_read(ptdev, GPU_INT_RAWSTAT) & GPU_IRQ_RESET_COMPLETED)) -+ timedout = true; -+ else -+ ptdev->gpu->pending_reqs &= ~GPU_IRQ_RESET_COMPLETED; -+ spin_unlock_irqrestore(&ptdev->gpu->reqs_lock, flags); -+ } -+ -+ if (timedout) { -+ drm_err(&ptdev->base, "Soft reset timeout"); -+ return -ETIMEDOUT; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_gpu_suspend() - Suspend the GPU block. -+ * @ptdev: Device. -+ * -+ * Suspend the GPU irq. This should be called last in the suspend procedure, -+ * after all other blocks have been suspented. -+ */ -+void panthor_gpu_suspend(struct panthor_device *ptdev) -+{ -+ /* -+ * It may be preferable to simply power down the L2, but for now just -+ * soft-reset which will leave the L2 powered down. -+ */ -+ panthor_gpu_soft_reset(ptdev); -+ panthor_gpu_irq_suspend(&ptdev->gpu->irq); -+} -+ -+/** -+ * panthor_gpu_resume() - Resume the GPU block. -+ * @ptdev: Device. -+ * -+ * Resume the IRQ handler and power-on the L2-cache. -+ * The FW takes care of powering the other blocks. -+ */ -+void panthor_gpu_resume(struct panthor_device *ptdev) -+{ -+ panthor_gpu_irq_resume(&ptdev->gpu->irq, GPU_INTERRUPTS_MASK); -+ panthor_gpu_l2_power_on(ptdev); -+} -diff --git a/drivers/gpu/drm/panthor/panthor_gpu.h b/drivers/gpu/drm/panthor/panthor_gpu.h -new file mode 100644 -index 000000000000..bba7555dd3c6 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_gpu.h -@@ -0,0 +1,52 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Collabora ltd. */ -+ -+#ifndef __PANTHOR_GPU_H__ -+#define __PANTHOR_GPU_H__ -+ -+struct panthor_device; -+ -+int panthor_gpu_init(struct panthor_device *ptdev); -+void panthor_gpu_unplug(struct panthor_device *ptdev); -+void panthor_gpu_suspend(struct panthor_device *ptdev); -+void panthor_gpu_resume(struct panthor_device *ptdev); -+ -+int panthor_gpu_block_power_on(struct panthor_device *ptdev, -+ const char *blk_name, -+ u32 pwron_reg, u32 pwrtrans_reg, -+ u32 rdy_reg, u64 mask, u32 timeout_us); -+int panthor_gpu_block_power_off(struct panthor_device *ptdev, -+ const char *blk_name, -+ u32 pwroff_reg, u32 pwrtrans_reg, -+ u64 mask, u32 timeout_us); -+ -+/** -+ * panthor_gpu_power_on() - Power on the GPU block. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+#define panthor_gpu_power_on(ptdev, type, mask, timeout_us) \ -+ panthor_gpu_block_power_on(ptdev, #type, \ -+ type ## _PWRON_LO, \ -+ type ## _PWRTRANS_LO, \ -+ type ## _READY_LO, \ -+ mask, timeout_us) -+ -+/** -+ * panthor_gpu_power_off() - Power off the GPU block. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+#define panthor_gpu_power_off(ptdev, type, mask, timeout_us) \ -+ panthor_gpu_block_power_off(ptdev, #type, \ -+ type ## _PWROFF_LO, \ -+ type ## _PWRTRANS_LO, \ -+ mask, timeout_us) -+ -+int panthor_gpu_l2_power_on(struct panthor_device *ptdev); -+int panthor_gpu_flush_caches(struct panthor_device *ptdev, -+ u32 l2, u32 lsc, u32 other); -+int panthor_gpu_soft_reset(struct panthor_device *ptdev); -+ -+#endif --- -2.42.0 - - -From 6afc90ee786da2b96e4d2f316fec4919ad033abd Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:36 +0100 -Subject: [PATCH 55/81] drm/panthor: Add GEM logical block - -Anything relating to GEM object management is placed here. Nothing -particularly interesting here, given the implementation is based on -drm_gem_shmem_object, which is doing most of the work. - -v4: -- Force kernel BOs to be GPU mapped -- Make panthor_kernel_bo_destroy() robust against ERR/NULL BO pointers - to simplify the call sites - -v3: -- Add acks for the MIT/GPL2 relicensing -- Provide a panthor_kernel_bo abstraction for buffer objects managed by - the kernel (will replace panthor_fw_mem and be used everywhere we were - using panthor_gem_create_and_map() before) -- Adjust things to match drm_gpuvm changes -- Change return of panthor_gem_create_with_handle() to int - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Link: https://lore.kernel.org/r/20240122163047.1954733-6-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_gem.c | 228 ++++++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_gem.h | 144 ++++++++++++++++ - 2 files changed, 372 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_gem.c - create mode 100644 drivers/gpu/drm/panthor/panthor_gem.h - -diff --git a/drivers/gpu/drm/panthor/panthor_gem.c b/drivers/gpu/drm/panthor/panthor_gem.c -new file mode 100644 -index 000000000000..42e342fcad19 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_gem.c -@@ -0,0 +1,228 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "panthor_device.h" -+#include "panthor_gem.h" -+#include "panthor_mmu.h" -+ -+static void panthor_gem_free_object(struct drm_gem_object *obj) -+{ -+ struct panthor_gem_object *bo = to_panthor_bo(obj); -+ struct drm_gem_object *vm_root_gem = bo->exclusive_vm_root_gem; -+ -+ drm_gem_free_mmap_offset(&bo->base.base); -+ mutex_destroy(&bo->gpuva_list_lock); -+ drm_gem_shmem_free(&bo->base); -+ drm_gem_object_put(vm_root_gem); -+} -+ -+/** -+ * panthor_kernel_bo_destroy() - Destroy a kernel buffer object -+ * @vm: The VM this BO was mapped to. -+ * @bo: Kernel buffer object to destroy. If NULL or an ERR_PTR(), the destruction -+ * is skipped. -+ */ -+void panthor_kernel_bo_destroy(struct panthor_vm *vm, -+ struct panthor_kernel_bo *bo) -+{ -+ int ret; -+ -+ if (IS_ERR_OR_NULL(bo)) -+ return; -+ -+ panthor_kernel_bo_vunmap(bo); -+ -+ if (drm_WARN_ON(bo->obj->dev, -+ to_panthor_bo(bo->obj)->exclusive_vm_root_gem != panthor_vm_root_gem(vm))) -+ goto out_free_bo; -+ -+ ret = panthor_vm_unmap_range(vm, bo->va_node.start, -+ panthor_kernel_bo_size(bo)); -+ if (ret) -+ goto out_free_bo; -+ -+ panthor_vm_free_va(vm, &bo->va_node); -+ drm_gem_object_put(bo->obj); -+ -+out_free_bo: -+ kfree(bo); -+} -+ -+/** -+ * panthor_kernel_bo_create() - Create and map a GEM object to a VM -+ * @ptdev: Device. -+ * @vm: VM to map the GEM to. If NULL, the kernel object is not GPU mapped. -+ * @size: Size of the buffer object. -+ * @bo_flags: Combination of drm_panthor_bo_flags flags. -+ * @vm_map_flags: Combination of drm_panthor_vm_bind_op_flags (only those -+ * that are related to map operations). -+ * @gpu_va: GPU address assigned when mapping to the VM. -+ * If gpu_va == PANTHOR_VM_KERNEL_AUTO_VA, the virtual address will be -+ * automatically allocated. -+ * -+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. -+ */ -+struct panthor_kernel_bo * -+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, -+ size_t size, u32 bo_flags, u32 vm_map_flags, -+ u64 gpu_va) -+{ -+ struct drm_gem_shmem_object *obj; -+ struct panthor_kernel_bo *kbo; -+ struct panthor_gem_object *bo; -+ int ret; -+ -+ if (drm_WARN_ON(&ptdev->base, !vm)) -+ return ERR_PTR(-EINVAL); -+ -+ kbo = kzalloc(sizeof(*kbo), GFP_KERNEL); -+ if (!kbo) -+ return ERR_PTR(-ENOMEM); -+ -+ obj = drm_gem_shmem_create(&ptdev->base, size); -+ if (IS_ERR(obj)) { -+ ret = PTR_ERR(obj); -+ goto err_free_bo; -+ } -+ -+ bo = to_panthor_bo(&obj->base); -+ size = obj->base.size; -+ kbo->obj = &obj->base; -+ bo->flags = bo_flags; -+ -+ ret = panthor_vm_alloc_va(vm, gpu_va, size, &kbo->va_node); -+ if (ret) -+ goto err_put_obj; -+ -+ ret = panthor_vm_map_bo_range(vm, bo, 0, size, kbo->va_node.start, vm_map_flags); -+ if (ret) -+ goto err_free_va; -+ -+ bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm); -+ drm_gem_object_get(bo->exclusive_vm_root_gem); -+ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; -+ return kbo; -+ -+err_free_va: -+ panthor_vm_free_va(vm, &kbo->va_node); -+ -+err_put_obj: -+ drm_gem_object_put(&obj->base); -+ -+err_free_bo: -+ kfree(kbo); -+ return ERR_PTR(ret); -+} -+ -+static int panthor_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) -+{ -+ struct panthor_gem_object *bo = to_panthor_bo(obj); -+ -+ /* Don't allow mmap on objects that have the NO_MMAP flag set. */ -+ if (bo->flags & DRM_PANTHOR_BO_NO_MMAP) -+ return -EINVAL; -+ -+ return drm_gem_shmem_object_mmap(obj, vma); -+} -+ -+static struct dma_buf * -+panthor_gem_prime_export(struct drm_gem_object *obj, int flags) -+{ -+ /* We can't export GEMs that have an exclusive VM. */ -+ if (to_panthor_bo(obj)->exclusive_vm_root_gem) -+ return ERR_PTR(-EINVAL); -+ -+ return drm_gem_prime_export(obj, flags); -+} -+ -+static const struct drm_gem_object_funcs panthor_gem_funcs = { -+ .free = panthor_gem_free_object, -+ .print_info = drm_gem_shmem_object_print_info, -+ .pin = drm_gem_shmem_object_pin, -+ .unpin = drm_gem_shmem_object_unpin, -+ .get_sg_table = drm_gem_shmem_object_get_sg_table, -+ .vmap = drm_gem_shmem_object_vmap, -+ .vunmap = drm_gem_shmem_object_vunmap, -+ .mmap = panthor_gem_mmap, -+ .export = panthor_gem_prime_export, -+ .vm_ops = &drm_gem_shmem_vm_ops, -+}; -+ -+/** -+ * panthor_gem_create_object - Implementation of driver->gem_create_object. -+ * @ddev: DRM device -+ * @size: Size in bytes of the memory the object will reference -+ * -+ * This lets the GEM helpers allocate object structs for us, and keep -+ * our BO stats correct. -+ */ -+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size) -+{ -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ struct panthor_gem_object *obj; -+ -+ obj = kzalloc(sizeof(*obj), GFP_KERNEL); -+ if (!obj) -+ return ERR_PTR(-ENOMEM); -+ -+ obj->base.base.funcs = &panthor_gem_funcs; -+ obj->base.map_wc = !ptdev->coherent; -+ mutex_init(&obj->gpuva_list_lock); -+ drm_gem_gpuva_set_lock(&obj->base.base, &obj->gpuva_list_lock); -+ -+ return &obj->base.base; -+} -+ -+/** -+ * panthor_gem_create_with_handle() - Create a GEM object and attach it to a handle. -+ * @file: DRM file. -+ * @ddev: DRM device. -+ * @exclusive_vm: Exclusive VM. Not NULL if the GEM object can't be shared. -+ * @size: Size of the GEM object to allocate. -+ * @flags: Combination of drm_panthor_bo_flags flags. -+ * @handle: Pointer holding the handle pointing to the new GEM object. -+ * -+ * Return: Zero on success -+ */ -+int -+panthor_gem_create_with_handle(struct drm_file *file, -+ struct drm_device *ddev, -+ struct panthor_vm *exclusive_vm, -+ size_t size, -+ u32 flags, u32 *handle) -+{ -+ int ret; -+ struct drm_gem_shmem_object *shmem; -+ struct panthor_gem_object *bo; -+ -+ shmem = drm_gem_shmem_create(ddev, size); -+ if (IS_ERR(shmem)) -+ return PTR_ERR(shmem); -+ -+ bo = to_panthor_bo(&shmem->base); -+ bo->flags = flags; -+ -+ if (exclusive_vm) { -+ bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm); -+ drm_gem_object_get(bo->exclusive_vm_root_gem); -+ bo->base.base.resv = bo->exclusive_vm_root_gem->resv; -+ } -+ -+ /* -+ * Allocate an id of idr table where the obj is registered -+ * and handle has the id what user can see. -+ */ -+ ret = drm_gem_handle_create(file, &shmem->base, handle); -+ /* drop reference from allocate - handle holds it now. */ -+ drm_gem_object_put(&shmem->base); -+ -+ return ret; -+} -diff --git a/drivers/gpu/drm/panthor/panthor_gem.h b/drivers/gpu/drm/panthor/panthor_gem.h -new file mode 100644 -index 000000000000..6c8010ceb641 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_gem.h -@@ -0,0 +1,144 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_GEM_H__ -+#define __PANTHOR_GEM_H__ -+ -+#include -+#include -+ -+#include -+#include -+ -+struct panthor_vm; -+ -+/** -+ * struct panthor_gem_object - Driver specific GEM object. -+ */ -+struct panthor_gem_object { -+ /** @base: Inherit from drm_gem_shmem_object. */ -+ struct drm_gem_shmem_object base; -+ -+ /** -+ * @exclusive_vm_root_gem: Root GEM of the exclusive VM this GEM object -+ * is attached to. -+ * -+ * If @exclusive_vm_root_gem != NULL, any attempt to bind the GEM to a -+ * different VM will fail. -+ * -+ * All FW memory objects have this field set to the root GEM of the MCU -+ * VM. -+ */ -+ struct drm_gem_object *exclusive_vm_root_gem; -+ -+ /** -+ * @gpuva_list_lock: Custom GPUVA lock. -+ * -+ * Used to protect insertion of drm_gpuva elements to the -+ * drm_gem_object.gpuva.list list. -+ * -+ * We can't use the GEM resv for that, because drm_gpuva_link() is -+ * called in a dma-signaling path, where we're not allowed to take -+ * resv locks. -+ */ -+ struct mutex gpuva_list_lock; -+ -+ /** @flags: Combination of drm_panthor_bo_flags flags. */ -+ u32 flags; -+}; -+ -+/** -+ * struct panthor_kernel_bo - Kernel buffer object. -+ * -+ * These objects are only manipulated by the kernel driver and not -+ * directly exposed to the userspace. The GPU address of a kernel -+ * BO might be passed to userspace though. -+ */ -+struct panthor_kernel_bo { -+ /** -+ * @obj: The GEM object backing this kernel buffer object. -+ */ -+ struct drm_gem_object *obj; -+ -+ /** -+ * @va_node: VA space allocated to this GEM. -+ */ -+ struct drm_mm_node va_node; -+ -+ /** -+ * @kmap: Kernel CPU mapping of @gem. -+ */ -+ void *kmap; -+}; -+ -+static inline -+struct panthor_gem_object *to_panthor_bo(struct drm_gem_object *obj) -+{ -+ return container_of(to_drm_gem_shmem_obj(obj), struct panthor_gem_object, base); -+} -+ -+struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size); -+ -+struct drm_gem_object * -+panthor_gem_prime_import_sg_table(struct drm_device *ddev, -+ struct dma_buf_attachment *attach, -+ struct sg_table *sgt); -+ -+int -+panthor_gem_create_with_handle(struct drm_file *file, -+ struct drm_device *ddev, -+ struct panthor_vm *exclusive_vm, -+ size_t size, -+ u32 flags, -+ uint32_t *handle); -+ -+static inline u64 -+panthor_kernel_bo_gpuva(struct panthor_kernel_bo *bo) -+{ -+ return bo->va_node.start; -+} -+ -+static inline size_t -+panthor_kernel_bo_size(struct panthor_kernel_bo *bo) -+{ -+ return bo->obj->size; -+} -+ -+static inline int -+panthor_kernel_bo_vmap(struct panthor_kernel_bo *bo) -+{ -+ struct iosys_map map; -+ int ret; -+ -+ if (bo->kmap) -+ return 0; -+ -+ ret = drm_gem_vmap_unlocked(bo->obj, &map); -+ if (ret) -+ return ret; -+ -+ bo->kmap = map.vaddr; -+ return 0; -+} -+ -+static inline void -+panthor_kernel_bo_vunmap(struct panthor_kernel_bo *bo) -+{ -+ if (bo->kmap) { -+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->kmap); -+ -+ drm_gem_vunmap_unlocked(bo->obj, &map); -+ bo->kmap = NULL; -+ } -+} -+ -+struct panthor_kernel_bo * -+panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm, -+ size_t size, u32 bo_flags, u32 vm_map_flags, -+ u64 gpu_va); -+ -+void panthor_kernel_bo_destroy(struct panthor_vm *vm, -+ struct panthor_kernel_bo *bo); -+ -+#endif /* __PANTHOR_GEM_H__ */ --- -2.42.0 - - -From 39a67ce4c2d117c0caf700b38b8528e261284491 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:37 +0100 -Subject: [PATCH 56/81] drm/panthor: Add the devfreq logical block -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Every thing related to devfreq in placed in panthor_devfreq.c, and -helpers that can be called by other logical blocks are exposed through -panthor_devfreq.h. - -This implementation is loosely based on the panfrost implementation, -the only difference being that we don't count device users, because -the idle/active state will be managed by the scheduler logic. - -v4: -- Add Clément's A-b for the relicensing - -v3: -- Add acks for the MIT/GPL2 relicensing - -v2: -- Added in v2 - -Cc: Clément Péron # MIT+GPL2 relicensing -Reviewed-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Acked-by: Clément Péron # MIT+GPL2 relicensing -Link: https://lore.kernel.org/r/20240122163047.1954733-7-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_devfreq.c | 283 ++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_devfreq.h | 25 ++ - 2 files changed, 308 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.c - create mode 100644 drivers/gpu/drm/panthor/panthor_devfreq.h - -diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c -new file mode 100644 -index 000000000000..dd28b15337d4 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_devfreq.c -@@ -0,0 +1,283 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2019 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#include "panthor_device.h" -+#include "panthor_devfreq.h" -+ -+/** -+ * struct panthor_devfreq - Device frequency management -+ */ -+struct panthor_devfreq { -+ /** @devfreq: devfreq device. */ -+ struct devfreq *devfreq; -+ -+ /** @gov_data: Governor data. */ -+ struct devfreq_simple_ondemand_data gov_data; -+ -+ /** @busy_time: Busy time. */ -+ ktime_t busy_time; -+ -+ /** @idle_time: Idle time. */ -+ ktime_t idle_time; -+ -+ /** @time_last_update: Last update time. */ -+ ktime_t time_last_update; -+ -+ /** @last_busy_state: True if the GPU was busy last time we updated the state. */ -+ bool last_busy_state; -+ -+ /* -+ * @lock: Lock used to protect busy_time, idle_time, time_last_update and -+ * last_busy_state. -+ * -+ * These fields can be accessed concurrently by panthor_devfreq_get_dev_status() -+ * and panthor_devfreq_record_{busy,idle}(). -+ */ -+ spinlock_t lock; -+}; -+ -+static void panthor_devfreq_update_utilization(struct panthor_devfreq *pdevfreq) -+{ -+ ktime_t now, last; -+ -+ now = ktime_get(); -+ last = pdevfreq->time_last_update; -+ -+ if (pdevfreq->last_busy_state) -+ pdevfreq->busy_time += ktime_sub(now, last); -+ else -+ pdevfreq->idle_time += ktime_sub(now, last); -+ -+ pdevfreq->time_last_update = now; -+} -+ -+static int panthor_devfreq_target(struct device *dev, unsigned long *freq, -+ u32 flags) -+{ -+ struct dev_pm_opp *opp; -+ -+ opp = devfreq_recommended_opp(dev, freq, flags); -+ if (IS_ERR(opp)) -+ return PTR_ERR(opp); -+ dev_pm_opp_put(opp); -+ -+ return dev_pm_opp_set_rate(dev, *freq); -+} -+ -+static void panthor_devfreq_reset(struct panthor_devfreq *pdevfreq) -+{ -+ pdevfreq->busy_time = 0; -+ pdevfreq->idle_time = 0; -+ pdevfreq->time_last_update = ktime_get(); -+} -+ -+static int panthor_devfreq_get_dev_status(struct device *dev, -+ struct devfreq_dev_status *status) -+{ -+ struct panthor_device *ptdev = dev_get_drvdata(dev); -+ struct panthor_devfreq *pdevfreq = ptdev->devfreq; -+ unsigned long irqflags; -+ -+ status->current_frequency = clk_get_rate(ptdev->clks.core); -+ -+ spin_lock_irqsave(&pdevfreq->lock, irqflags); -+ -+ panthor_devfreq_update_utilization(pdevfreq); -+ -+ status->total_time = ktime_to_ns(ktime_add(pdevfreq->busy_time, -+ pdevfreq->idle_time)); -+ -+ status->busy_time = ktime_to_ns(pdevfreq->busy_time); -+ -+ panthor_devfreq_reset(pdevfreq); -+ -+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); -+ -+ drm_dbg(&ptdev->base, "busy %lu total %lu %lu %% freq %lu MHz\n", -+ status->busy_time, status->total_time, -+ status->busy_time / (status->total_time / 100), -+ status->current_frequency / 1000 / 1000); -+ -+ return 0; -+} -+ -+static struct devfreq_dev_profile panthor_devfreq_profile = { -+ .timer = DEVFREQ_TIMER_DELAYED, -+ .polling_ms = 50, /* ~3 frames */ -+ .target = panthor_devfreq_target, -+ .get_dev_status = panthor_devfreq_get_dev_status, -+}; -+ -+int panthor_devfreq_init(struct panthor_device *ptdev) -+{ -+ /* There's actually 2 regulators (mali and sram), but the OPP core only -+ * supports one. -+ * -+ * We assume the sram regulator is coupled with the mali one and let -+ * the coupling logic deal with voltage updates. -+ */ -+ static const char * const reg_names[] = { "mali", NULL }; -+ struct thermal_cooling_device *cooling; -+ struct device *dev = ptdev->base.dev; -+ struct panthor_devfreq *pdevfreq; -+ struct dev_pm_opp *opp; -+ unsigned long cur_freq; -+ int ret; -+ -+ pdevfreq = drmm_kzalloc(&ptdev->base, sizeof(*ptdev->devfreq), GFP_KERNEL); -+ if (!pdevfreq) -+ return -ENOMEM; -+ -+ ptdev->devfreq = pdevfreq; -+ -+ ret = devm_pm_opp_set_regulators(dev, reg_names); -+ if (ret) { -+ if (ret != -EPROBE_DEFER) -+ DRM_DEV_ERROR(dev, "Couldn't set OPP regulators\n"); -+ -+ return ret; -+ } -+ -+ ret = devm_pm_opp_of_add_table(dev); -+ if (ret) -+ return ret; -+ -+ spin_lock_init(&pdevfreq->lock); -+ -+ panthor_devfreq_reset(pdevfreq); -+ -+ cur_freq = clk_get_rate(ptdev->clks.core); -+ -+ opp = devfreq_recommended_opp(dev, &cur_freq, 0); -+ if (IS_ERR(opp)) -+ return PTR_ERR(opp); -+ -+ panthor_devfreq_profile.initial_freq = cur_freq; -+ -+ /* Regulator coupling only takes care of synchronizing/balancing voltage -+ * updates, but the coupled regulator needs to be enabled manually. -+ * -+ * We use devm_regulator_get_enable_optional() and keep the sram supply -+ * enabled until the device is removed, just like we do for the mali -+ * supply, which is enabled when dev_pm_opp_set_opp(dev, opp) is called, -+ * and disabled when the opp_table is torn down, using the devm action. -+ * -+ * If we really care about disabling regulators on suspend, we should: -+ * - use devm_regulator_get_optional() here -+ * - call dev_pm_opp_set_opp(dev, NULL) before leaving this function -+ * (this disables the regulator passed to the OPP layer) -+ * - call dev_pm_opp_set_opp(dev, NULL) and -+ * regulator_disable(ptdev->regulators.sram) in -+ * panthor_devfreq_suspend() -+ * - call dev_pm_opp_set_opp(dev, default_opp) and -+ * regulator_enable(ptdev->regulators.sram) in -+ * panthor_devfreq_resume() -+ * -+ * But without knowing if it's beneficial or not (in term of power -+ * consumption), or how much it slows down the suspend/resume steps, -+ * let's just keep regulators enabled for the device lifetime. -+ */ -+ ret = devm_regulator_get_enable_optional(dev, "sram"); -+ if (ret && ret != -ENODEV) { -+ if (ret != -EPROBE_DEFER) -+ DRM_DEV_ERROR(dev, "Couldn't retrieve/enable sram supply\n"); -+ return ret; -+ } -+ -+ /* -+ * Set the recommend OPP this will enable and configure the regulator -+ * if any and will avoid a switch off by regulator_late_cleanup() -+ */ -+ ret = dev_pm_opp_set_opp(dev, opp); -+ if (ret) { -+ DRM_DEV_ERROR(dev, "Couldn't set recommended OPP\n"); -+ return ret; -+ } -+ -+ dev_pm_opp_put(opp); -+ -+ /* -+ * Setup default thresholds for the simple_ondemand governor. -+ * The values are chosen based on experiments. -+ */ -+ pdevfreq->gov_data.upthreshold = 45; -+ pdevfreq->gov_data.downdifferential = 5; -+ -+ pdevfreq->devfreq = devm_devfreq_add_device(dev, &panthor_devfreq_profile, -+ DEVFREQ_GOV_SIMPLE_ONDEMAND, -+ &pdevfreq->gov_data); -+ if (IS_ERR(pdevfreq->devfreq)) { -+ DRM_DEV_ERROR(dev, "Couldn't initialize GPU devfreq\n"); -+ ret = PTR_ERR(pdevfreq->devfreq); -+ pdevfreq->devfreq = NULL; -+ return ret; -+ } -+ -+ cooling = devfreq_cooling_em_register(pdevfreq->devfreq, NULL); -+ if (IS_ERR(cooling)) -+ DRM_DEV_INFO(dev, "Failed to register cooling device\n"); -+ -+ return 0; -+} -+ -+int panthor_devfreq_resume(struct panthor_device *ptdev) -+{ -+ struct panthor_devfreq *pdevfreq = ptdev->devfreq; -+ -+ if (!pdevfreq->devfreq) -+ return 0; -+ -+ panthor_devfreq_reset(pdevfreq); -+ -+ return devfreq_resume_device(pdevfreq->devfreq); -+} -+ -+int panthor_devfreq_suspend(struct panthor_device *ptdev) -+{ -+ struct panthor_devfreq *pdevfreq = ptdev->devfreq; -+ -+ if (!pdevfreq->devfreq) -+ return 0; -+ -+ return devfreq_suspend_device(pdevfreq->devfreq); -+} -+ -+void panthor_devfreq_record_busy(struct panthor_device *ptdev) -+{ -+ struct panthor_devfreq *pdevfreq = ptdev->devfreq; -+ unsigned long irqflags; -+ -+ if (!pdevfreq->devfreq) -+ return; -+ -+ spin_lock_irqsave(&pdevfreq->lock, irqflags); -+ -+ panthor_devfreq_update_utilization(pdevfreq); -+ pdevfreq->last_busy_state = true; -+ -+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); -+} -+ -+void panthor_devfreq_record_idle(struct panthor_device *ptdev) -+{ -+ struct panthor_devfreq *pdevfreq = ptdev->devfreq; -+ unsigned long irqflags; -+ -+ if (!pdevfreq->devfreq) -+ return; -+ -+ spin_lock_irqsave(&pdevfreq->lock, irqflags); -+ -+ panthor_devfreq_update_utilization(pdevfreq); -+ pdevfreq->last_busy_state = false; -+ -+ spin_unlock_irqrestore(&pdevfreq->lock, irqflags); -+} -diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h -new file mode 100644 -index 000000000000..875fbb5a1c1b ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_devfreq.h -@@ -0,0 +1,25 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2019 Collabora ltd. */ -+ -+#ifndef __PANTHOR_DEVFREQ_H__ -+#define __PANTHOR_DEVFREQ_H__ -+ -+#include -+#include -+#include -+ -+struct devfreq; -+struct thermal_cooling_device; -+ -+struct panthor_device; -+struct panthor_devfreq; -+ -+int panthor_devfreq_init(struct panthor_device *ptdev); -+ -+int panthor_devfreq_resume(struct panthor_device *ptdev); -+int panthor_devfreq_suspend(struct panthor_device *ptdev); -+ -+void panthor_devfreq_record_busy(struct panthor_device *ptdev); -+void panthor_devfreq_record_idle(struct panthor_device *ptdev); -+ -+#endif /* __PANTHOR_DEVFREQ_H__ */ --- -2.42.0 - - -From bb82dee91cb249bed4f2fa65a02969ffca2f30fe Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:38 +0100 -Subject: [PATCH 57/81] drm/panthor: Add the MMU/VM logical block - -MMU and VM management is related and placed in the same source file. - -Page table updates are delegated to the io-pgtable-arm driver that's in -the iommu subsystem. - -The VM management logic is based on drm_gpuva_mgr, and is assuming the -VA space is mostly managed by the usermode driver, except for a reserved -portion of this VA-space that's used for kernel objects (like the heap -contexts/chunks). - -Both asynchronous and synchronous VM operations are supported, and -internal helpers are exposed to allow other logical blocks to map their -buffers in the GPU VA space. - -There's one VM_BIND queue per-VM (meaning the Vulkan driver can only -expose one sparse-binding queue), and this bind queue is managed with -a 1:1 drm_sched_entity:drm_gpu_scheduler, such that each VM gets its own -independent execution queue, avoiding VM operation serialization at the -device level (things are still serialized at the VM level). - -The rest is just implementation details that are hopefully well explained -in the documentation. - -v4: -- Add an helper to return the VM state -- Check drmm_mutex_init() return code -- Remove the VM from the AS reclaim list when panthor_vm_active() is - called -- Count the number of active VM users instead of considering there's - at most one user (several scheduling groups can point to the same - vM) -- Pre-allocate a VMA object for unmap operations (unmaps can trigger - a sm_step_remap() call) -- Check vm->root_page_table instead of vm->pgtbl_ops to detect if - the io-pgtable is trying to allocate the root page table -- Don't memset() the va_node in panthor_vm_alloc_va(), make it a - caller requirement -- Fix the kernel doc in a few places -- Drop the panthor_vm::base offset constraint and modify - panthor_vm_put() to explicitly check for a NULL value -- Fix unbalanced vm_bo refcount in panthor_gpuva_sm_step_remap() -- Drop stale comments about the shared_bos list -- Patch mmu_features::va_bits on 32-bit builds to reflect the - io_pgtable limitation and let the UMD know about it - -v3: -- Add acks for the MIT/GPL2 relicensing -- Propagate MMU faults to the scheduler -- Move pages pinning/unpinning out of the dma_signalling path -- Fix 32-bit support -- Rework the user/kernel VA range calculation -- Make the auto-VA range explicit (auto-VA range doesn't cover the full - kernel-VA range on the MCU VM) -- Let callers of panthor_vm_alloc_va() allocate the drm_mm_node - (embedded in panthor_kernel_bo now) -- Adjust things to match the latest drm_gpuvm changes (extobj tracking, - resv prep and more) -- Drop the per-AS lock and use slots_lock (fixes a race on vm->as.id) -- Set as.id to -1 when reusing an address space from the LRU list -- Drop misleading comment about page faults -- Remove check for irq being assigned in panthor_mmu_unplug() - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Link: https://lore.kernel.org/r/20240122163047.1954733-8-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_mmu.c | 2760 +++++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_mmu.h | 102 + - 2 files changed, 2862 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.c - create mode 100644 drivers/gpu/drm/panthor/panthor_mmu.h - -diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c -new file mode 100644 -index 000000000000..d3ce29cd0662 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_mmu.c -@@ -0,0 +1,2760 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "panthor_device.h" -+#include "panthor_heap.h" -+#include "panthor_mmu.h" -+#include "panthor_sched.h" -+#include "panthor_gem.h" -+#include "panthor_regs.h" -+ -+#define MAX_AS_SLOTS 32 -+ -+struct panthor_vm; -+ -+/** -+ * struct panthor_as_slot - Address space slot -+ */ -+struct panthor_as_slot { -+ /** @vm: VM bound to this slot. NULL is no VM is bound. */ -+ struct panthor_vm *vm; -+}; -+ -+/** -+ * struct panthor_mmu - MMU related data -+ */ -+struct panthor_mmu { -+ /** @irq: The MMU irq. */ -+ struct panthor_irq irq; -+ -+ /** @as: Address space related fields. -+ * -+ * The GPU has a limited number of address spaces (AS) slots, forcing -+ * us to re-assign them to re-assign slots on-demand. -+ */ -+ struct { -+ /** @slots_lock: Lock protecting access to all other AS fields. */ -+ struct mutex slots_lock; -+ -+ /** @alloc_mask: Bitmask encoding the allocated slots. */ -+ unsigned long alloc_mask; -+ -+ /** @faulty_mask: Bitmask encoding the faulty slots. */ -+ unsigned long faulty_mask; -+ -+ /** @slots: VMs currently bound to the AS slots. */ -+ struct panthor_as_slot slots[MAX_AS_SLOTS]; -+ -+ /** -+ * @lru_list: List of least recently used VMs. -+ * -+ * We use this list to pick a VM to evict when all slots are -+ * used. -+ * -+ * There should be no more active VMs than there are AS slots, -+ * so this LRU is just here to keep VMs bound until there's -+ * a need to release a slot, thus avoid unnecessary TLB/cache -+ * flushes. -+ */ -+ struct list_head lru_list; -+ } as; -+ -+ /** @vm: VMs management fields */ -+ struct { -+ /** @lock: Lock protecting access to list. */ -+ struct mutex lock; -+ -+ /** @list: List containing all VMs. */ -+ struct list_head list; -+ -+ /** @reset_in_progress: True if a reset is in progress. */ -+ bool reset_in_progress; -+ -+ /** @wq: Workqueue used for the VM_BIND queues. */ -+ struct workqueue_struct *wq; -+ } vm; -+}; -+ -+/** -+ * struct panthor_vm_pool - VM pool object -+ */ -+struct panthor_vm_pool { -+ /** @xa: Array used for VM handle tracking. */ -+ struct xarray xa; -+}; -+ -+/** -+ * struct panthor_vma - GPU mapping object -+ * -+ * This is used to track GEM mappings in GPU space. -+ */ -+struct panthor_vma { -+ /** @base: Inherits from drm_gpuva. */ -+ struct drm_gpuva base; -+ -+ /** @node: Used to implement deferred release of VMAs. */ -+ struct list_head node; -+ -+ /** -+ * @flags: Combination of drm_panthor_vm_bind_op_flags. -+ * -+ * Only map related flags are accepted. -+ */ -+ u32 flags; -+}; -+ -+/** -+ * struct panthor_vm_op_ctx - VM operation context -+ * -+ * With VM operations potentially taking place in a dma-signaling path, we -+ * need to make sure everything that might require resource allocation is -+ * pre-allocated upfront. This is what this operation context is far. -+ * -+ * We also collect resources that have been freed, so we can release them -+ * asynchronously, and let the VM_BIND scheduler process the next VM_BIND -+ * request. -+ */ -+struct panthor_vm_op_ctx { -+ /** @rsvd_page_tables: Pages reserved for the MMU page table update. */ -+ struct { -+ /** @count: Number of pages reserved. */ -+ u32 count; -+ -+ /** @ptr: Point to the first unused page in the @pages table. */ -+ u32 ptr; -+ -+ /** -+ * @page: Array of pages that can be used for an MMU page table update. -+ * -+ * After an VM operation, there might be free pages left in this array. -+ * They should be returned to the pt_cache as part of the op_ctx cleanup. -+ */ -+ void **pages; -+ } rsvd_page_tables; -+ -+ /** -+ * @preallocated_vmas: Pre-allocated VMAs to handle the remap case. -+ * -+ * Partial unmap requests or map requests overlapping existing mappings will -+ * trigger a remap call, which need to register up to three panthor_vma objects -+ * (one for the new mapping, and two for the previous and next mappings). -+ */ -+ struct panthor_vma *preallocated_vmas[3]; -+ -+ /** @flags: Combination of drm_panthor_vm_bind_op_flags. */ -+ u32 flags; -+ -+ /** @va: Virtual range targeted by the VM operation. */ -+ struct { -+ /** @addr: Start address. */ -+ u64 addr; -+ -+ /** @range: Range size. */ -+ u64 range; -+ } va; -+ -+ /** -+ * @returned_vmas: List of panthor_vma objects returned after a VM operation. -+ * -+ * For unmap operations, this will contain all VMAs that were covered by the -+ * specified VA range. -+ * -+ * For map operations, this will contain all VMAs that previously mapped to -+ * the specified VA range. -+ * -+ * Those VMAs, and the resources they point to will be released as part of -+ * the op_ctx cleanup operation. -+ */ -+ struct list_head returned_vmas; -+ -+ /** @map: Fields specific to a map operation. */ -+ struct { -+ /** @vm_bo: Buffer object to map. */ -+ struct drm_gpuvm_bo *vm_bo; -+ -+ /** @bo_offset: Offset in the buffer object. */ -+ u64 bo_offset; -+ -+ /** -+ * @sgt: sg-table pointing to pages backing the GEM object. -+ * -+ * This is gathered at job creation time, such that we don't have -+ * to allocate in ::run_job(). -+ */ -+ struct sg_table *sgt; -+ -+ /** -+ * @new_vma: The new VMA object that will be inserted to the VA tree. -+ */ -+ struct panthor_vma *new_vma; -+ } map; -+}; -+ -+/** -+ * struct panthor_vm - VM object -+ * -+ * A VM is an object representing a GPU (or MCU) virtual address space. -+ * It embeds the MMU page table for this address space, a tree containing -+ * all the virtual mappings of GEM objects, and other things needed to manage -+ * the VM. -+ * -+ * Except for the MCU VM, which is managed by the kernel, all other VMs are -+ * created by userspace and mostly managed by userspace, using the -+ * %DRM_IOCTL_PANTHOR_VM_BIND ioctl. -+ * -+ * A portion of the virtual address space is reserved for kernel objects, -+ * like heap chunks, and userspace gets to decide how much of the virtual -+ * address space is left to the kernel (half of the virtual address space -+ * by default). -+ */ -+struct panthor_vm { -+ /** -+ * @base: Inherit from drm_gpuvm. -+ * -+ * We delegate all the VA management to the common drm_gpuvm framework -+ * and only implement hooks to update the MMU page table. -+ */ -+ struct drm_gpuvm base; -+ -+ /** -+ * @sched: Scheduler used for asynchronous VM_BIND request. -+ * -+ * We use a 1:1 scheduler here. -+ */ -+ struct drm_gpu_scheduler sched; -+ -+ /** -+ * @entity: Scheduling entity representing the VM_BIND queue. -+ * -+ * There's currently one bind queue per VM. It doesn't make sense to -+ * allow more given the VM operations are serialized anyway. -+ */ -+ struct drm_sched_entity entity; -+ -+ /** @ptdev: Device. */ -+ struct panthor_device *ptdev; -+ -+ /** @memattr: Value to program to the AS_MEMATTR register. */ -+ u64 memattr; -+ -+ /** @pgtbl_ops: Page table operations. */ -+ struct io_pgtable_ops *pgtbl_ops; -+ -+ /** @root_page_table: Stores the root page table pointer. */ -+ void *root_page_table; -+ -+ /** -+ * @op_lock: Lock used to serialize operations on a VM. -+ * -+ * The serialization of jobs queued to the VM_BIND queue is already -+ * taken care of by drm_sched, but we need to serialize synchronous -+ * and asynchronous VM_BIND request. This is what this lock is for. -+ */ -+ struct mutex op_lock; -+ -+ /** -+ * @op_ctx: The context attached to the currently executing VM operation. -+ * -+ * NULL when no operation is in progress. -+ */ -+ struct panthor_vm_op_ctx *op_ctx; -+ -+ /** -+ * @mm: Memory management object representing the auto-VA/kernel-VA. -+ * -+ * Used to auto-allocate VA space for kernel-managed objects (tiler -+ * heaps, ...). -+ * -+ * For the MCU VM, this is managing the VA range that's used to map -+ * all shared interfaces. -+ * -+ * For user VMs, the range is specified by userspace, and must not -+ * exceed half of the VA space addressable. -+ */ -+ struct drm_mm mm; -+ -+ /** @mm_lock: Lock protecting the @mm field. */ -+ struct mutex mm_lock; -+ -+ /** @kernel_auto_va: Automatic VA-range for kernel BOs. */ -+ struct { -+ /** @start: Start of the automatic VA-range for kernel BOs. */ -+ u64 start; -+ -+ /** @size: Size of the automatic VA-range for kernel BOs. */ -+ u64 end; -+ } kernel_auto_va; -+ -+ /** @as: Address space related fields. */ -+ struct { -+ /** -+ * @id: ID of the address space this VM is bound to. -+ * -+ * A value of -1 means the VM is inactive/not bound. -+ */ -+ int id; -+ -+ /** @active_cnt: Number of active users of this VM. */ -+ refcount_t active_cnt; -+ -+ /** -+ * @lru_node: Used to instead the VM in the panthor_mmu::as::lru_list. -+ * -+ * Active VMs should not be inserted in the LRU list. -+ */ -+ struct list_head lru_node; -+ } as; -+ -+ /** -+ * @heaps: Tiler heap related fields. -+ */ -+ struct { -+ /** -+ * @pool: The heap pool attached to this VM. -+ * -+ * Will stay NULL until someone creates a heap context on this VM. -+ */ -+ struct panthor_heap_pool *pool; -+ -+ /** @lock: Lock used to protect access to @pool. */ -+ struct mutex lock; -+ } heaps; -+ -+ /** @node: Used to insert the VM in the panthor_mmu::vm::list. */ -+ struct list_head node; -+ -+ /** @for_mcu: True if this is the MCU VM. */ -+ bool for_mcu; -+ -+ /** -+ * @destroyed: True if the VM was destroyed. -+ * -+ * No further bind requests should be queued to a destroyed VM. -+ */ -+ bool destroyed; -+ -+ /** -+ * @unusable: True if the VM has turned unusable because something -+ * bad happened during an asynchronous request. -+ * -+ * We don't try to recover from such failures, because this implies -+ * informing userspace about the specific operation that failed, and -+ * hoping the userspace driver can replay things from there. This all -+ * sounds very complicated for little gain. -+ * -+ * Instead, we should just flag the VM as unusable, and fail any -+ * further request targeting this VM. -+ * -+ * We also provide a way to query a VM state, so userspace can destroy -+ * it and create a new one. -+ * -+ * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST -+ * situation, where the logical device needs to be re-created. -+ */ -+ bool unusable; -+ -+ /** -+ * @unhandled_fault: Unhandled fault happened. -+ * -+ * This should be reported to the scheduler, and the queue/group be -+ * flagged as faulty as a result. -+ */ -+ bool unhandled_fault; -+}; -+ -+/** -+ * struct panthor_vm_bind_job - VM bind job -+ */ -+struct panthor_vm_bind_job { -+ /** @base: Inherit from drm_sched_job. */ -+ struct drm_sched_job base; -+ -+ /** @refcount: Reference count. */ -+ struct kref refcount; -+ -+ /** @cleanup_op_ctx_work: Work used to cleanup the VM operation context. */ -+ struct work_struct cleanup_op_ctx_work; -+ -+ /** @vm: VM targeted by the VM operation. */ -+ struct panthor_vm *vm; -+ -+ /** @ctx: Operation context. */ -+ struct panthor_vm_op_ctx ctx; -+}; -+ -+/** -+ * @pt_cache: Cache used to allocate MMU page tables. -+ * -+ * The pre-allocation pattern forces us to over-allocate to plan for -+ * the worst case scenario, and return the pages we didn't use. -+ * -+ * Having a kmem_cache allows us to speed allocations. -+ */ -+static struct kmem_cache *pt_cache; -+ -+/** -+ * alloc_pt() - Custom page table allocator -+ * @cookie: Cookie passed at page table allocation time. -+ * @size: Size of the page table. This size should be fixed, -+ * and determined at creation time based on the granule size. -+ * @gfp: GFP flags. -+ * -+ * We want a custom allocator so we can use a cache for page table -+ * allocations and amortize the cost of the over-reservation that's -+ * done to allow asynchronous VM operations. -+ * -+ * Return: non-NULL on success, NULL if the allocation failed for any -+ * reason. -+ */ -+static void *alloc_pt(void *cookie, size_t size, gfp_t gfp) -+{ -+ struct panthor_vm *vm = cookie; -+ void *page; -+ -+ /* Allocation of the root page table happening during init. */ -+ if (unlikely(!vm->root_page_table)) { -+ struct page *p; -+ -+ drm_WARN_ON(&vm->ptdev->base, vm->op_ctx); -+ p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev), -+ gfp | __GFP_ZERO, get_order(size)); -+ page = p ? page_address(p) : NULL; -+ vm->root_page_table = page; -+ return page; -+ } -+ -+ /* We're not supposed to have anything bigger than 4k here, because we picked a -+ * 4k granule size at init time. -+ */ -+ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) -+ return NULL; -+ -+ /* We must have some op_ctx attached to the VM and it must have at least one -+ * free page. -+ */ -+ if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) || -+ drm_WARN_ON(&vm->ptdev->base, -+ vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count)) -+ return NULL; -+ -+ page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++]; -+ memset(page, 0, SZ_4K); -+ -+ /* Page table entries don't use virtual addresses, which trips out -+ * kmemleak. kmemleak_alloc_phys() might work, but physical addresses -+ * are mixed with other fields, and I fear kmemleak won't detect that -+ * either. -+ * -+ * Let's just ignore memory passed to the page-table driver for now. -+ */ -+ kmemleak_ignore(page); -+ return page; -+} -+ -+/** -+ * @free_pt() - Custom page table free function -+ * @cookie: Cookie passed at page table allocation time. -+ * @data: Page table to free. -+ * @size: Size of the page table. This size should be fixed, -+ * and determined at creation time based on the granule size. -+ */ -+static void free_pt(void *cookie, void *data, size_t size) -+{ -+ struct panthor_vm *vm = cookie; -+ -+ if (unlikely(vm->root_page_table == data)) { -+ free_pages((unsigned long)data, get_order(size)); -+ vm->root_page_table = NULL; -+ return; -+ } -+ -+ if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K)) -+ return; -+ -+ /* Return the page to the pt_cache. */ -+ kmem_cache_free(pt_cache, data); -+} -+ -+static int wait_ready(struct panthor_device *ptdev, u32 as_nr) -+{ -+ int ret; -+ u32 val; -+ -+ /* Wait for the MMU status to indicate there is no active command, in -+ * case one is pending. -+ */ -+ ret = readl_relaxed_poll_timeout_atomic(ptdev->iomem + AS_STATUS(as_nr), -+ val, !(val & AS_STATUS_AS_ACTIVE), -+ 10, 100000); -+ -+ if (ret) { -+ panthor_device_schedule_reset(ptdev); -+ drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n"); -+ } -+ -+ return ret; -+} -+ -+static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd) -+{ -+ int status; -+ -+ /* write AS_COMMAND when MMU is ready to accept another command */ -+ status = wait_ready(ptdev, as_nr); -+ if (!status) -+ gpu_write(ptdev, AS_COMMAND(as_nr), cmd); -+ -+ return status; -+} -+ -+static void lock_region(struct panthor_device *ptdev, u32 as_nr, -+ u64 region_start, u64 size) -+{ -+ u8 region_width; -+ u64 region; -+ u64 region_end = region_start + size; -+ -+ if (!size) -+ return; -+ -+ /* -+ * The locked region is a naturally aligned power of 2 block encoded as -+ * log2 minus(1). -+ * Calculate the desired start/end and look for the highest bit which -+ * differs. The smallest naturally aligned block must include this bit -+ * change, the desired region starts with this bit (and subsequent bits) -+ * zeroed and ends with the bit (and subsequent bits) set to one. -+ */ -+ region_width = max(fls64(region_start ^ (region_end - 1)), -+ const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - 1; -+ -+ /* -+ * Mask off the low bits of region_start (which would be ignored by -+ * the hardware anyway) -+ */ -+ region_start &= GENMASK_ULL(63, region_width); -+ -+ region = region_width | region_start; -+ -+ /* Lock the region that needs to be updated */ -+ gpu_write(ptdev, AS_LOCKADDR_LO(as_nr), lower_32_bits(region)); -+ gpu_write(ptdev, AS_LOCKADDR_HI(as_nr), upper_32_bits(region)); -+ write_cmd(ptdev, as_nr, AS_COMMAND_LOCK); -+} -+ -+static int mmu_hw_do_operation_locked(struct panthor_device *ptdev, int as_nr, -+ u64 iova, u64 size, u32 op) -+{ -+ lockdep_assert_held(&ptdev->mmu->as.slots_lock); -+ -+ if (as_nr < 0) -+ return 0; -+ -+ if (op != AS_COMMAND_UNLOCK) -+ lock_region(ptdev, as_nr, iova, size); -+ -+ /* Run the MMU operation */ -+ write_cmd(ptdev, as_nr, op); -+ -+ /* Wait for the flush to complete */ -+ return wait_ready(ptdev, as_nr); -+} -+ -+static int mmu_hw_do_operation(struct panthor_vm *vm, -+ u64 iova, u64 size, u32 op) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ int ret; -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ ret = mmu_hw_do_operation_locked(ptdev, vm->as.id, iova, size, op); -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ return ret; -+} -+ -+static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr, -+ u64 transtab, u64 transcfg, u64 memattr) -+{ -+ int ret; -+ -+ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); -+ if (ret) -+ return ret; -+ -+ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), lower_32_bits(transtab)); -+ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), upper_32_bits(transtab)); -+ -+ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), lower_32_bits(memattr)); -+ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), upper_32_bits(memattr)); -+ -+ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), lower_32_bits(transcfg)); -+ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), upper_32_bits(transcfg)); -+ -+ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); -+} -+ -+static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr) -+{ -+ int ret; -+ -+ ret = mmu_hw_do_operation_locked(ptdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM); -+ if (ret) -+ return ret; -+ -+ gpu_write(ptdev, AS_TRANSTAB_LO(as_nr), 0); -+ gpu_write(ptdev, AS_TRANSTAB_HI(as_nr), 0); -+ -+ gpu_write(ptdev, AS_MEMATTR_LO(as_nr), 0); -+ gpu_write(ptdev, AS_MEMATTR_HI(as_nr), 0); -+ -+ gpu_write(ptdev, AS_TRANSCFG_LO(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED); -+ gpu_write(ptdev, AS_TRANSCFG_HI(as_nr), 0); -+ -+ return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE); -+} -+ -+static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value) -+{ -+ /* Bits 16 to 31 mean REQ_COMPLETE. */ -+ return value & GENMASK(15, 0); -+} -+ -+static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as) -+{ -+ return BIT(as); -+} -+ -+/** -+ * panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults -+ * @vm: VM to check. -+ * -+ * Return: true if the VM has unhandled faults, false otherwise. -+ */ -+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm) -+{ -+ return vm->unhandled_fault; -+} -+ -+/** -+ * panthor_vm_is_unusable() - Check if the VM is still usable -+ * @vm: VM to check. -+ * -+ * Return: true if the VM is unusable, false otherwise. -+ */ -+bool panthor_vm_is_unusable(struct panthor_vm *vm) -+{ -+ return vm->unusable; -+} -+ -+static void panthor_vm_release_as_locked(struct panthor_vm *vm) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ -+ lockdep_assert_held(&ptdev->mmu->as.slots_lock); -+ -+ if (drm_WARN_ON(&ptdev->base, vm->as.id < 0)) -+ return; -+ -+ ptdev->mmu->as.slots[vm->as.id].vm = NULL; -+ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); -+ refcount_set(&vm->as.active_cnt, 0); -+ list_del_init(&vm->as.lru_node); -+ vm->as.id = -1; -+} -+ -+/** -+ * panthor_vm_active() - Flag a VM as active -+ * @VM: VM to flag as active. -+ * -+ * Assigns an address space to a VM so it can be used by the GPU/MCU. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_active(struct panthor_vm *vm) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg; -+ int ret = 0, as, cookie; -+ u64 transtab, transcfg; -+ -+ if (!drm_dev_enter(&ptdev->base, &cookie)) -+ return -ENODEV; -+ -+ if (refcount_inc_not_zero(&vm->as.active_cnt)) -+ goto out_dev_exit; -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ -+ if (refcount_inc_not_zero(&vm->as.active_cnt)) -+ goto out_unlock; -+ -+ as = vm->as.id; -+ if (as >= 0) { -+ /* Unhandled pagefault on this AS, the MMU was disabled. We need to -+ * re-enable the MMU after clearing+unmasking the AS interrupts. -+ */ -+ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) -+ goto out_enable_as; -+ -+ goto out_make_active; -+ } -+ -+ /* Check for a free AS */ -+ if (vm->for_mcu) { -+ drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(0)); -+ as = 0; -+ } else { -+ as = ffz(ptdev->mmu->as.alloc_mask | BIT(0)); -+ } -+ -+ if (!(BIT(as) & ptdev->gpu_info.as_present)) { -+ struct panthor_vm *lru_vm; -+ -+ lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list, -+ struct panthor_vm, -+ as.lru_node); -+ if (drm_WARN_ON(&ptdev->base, !lru_vm)) { -+ ret = -EBUSY; -+ goto out_unlock; -+ } -+ -+ drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt)); -+ as = lru_vm->as.id; -+ panthor_vm_release_as_locked(lru_vm); -+ } -+ -+ /* Assign the free or reclaimed AS to the FD */ -+ vm->as.id = as; -+ set_bit(as, &ptdev->mmu->as.alloc_mask); -+ ptdev->mmu->as.slots[as].vm = vm; -+ -+out_enable_as: -+ transtab = cfg->arm_lpae_s1_cfg.ttbr; -+ transcfg = AS_TRANSCFG_PTW_MEMATTR_WB | -+ AS_TRANSCFG_PTW_RA | -+ AS_TRANSCFG_ADRMODE_AARCH64_4K; -+ if (ptdev->coherent) -+ transcfg |= AS_TRANSCFG_PTW_SH_OS; -+ -+ /* If the VM is re-activated, we clear the fault. */ -+ vm->unhandled_fault = false; -+ -+ /* Unhandled pagefault on this AS, clear the fault and re-enable interrupts -+ * before enabling the AS. -+ */ -+ if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) { -+ gpu_write(ptdev, MMU_INT_CLEAR, panthor_mmu_as_fault_mask(ptdev, as)); -+ ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as); -+ gpu_write(ptdev, MMU_INT_MASK, ~ptdev->mmu->as.faulty_mask); -+ } -+ -+ ret = panthor_mmu_as_enable(vm->ptdev, vm->as.id, transtab, transcfg, vm->memattr); -+ -+out_make_active: -+ if (!ret) { -+ refcount_set(&vm->as.active_cnt, 1); -+ list_del_init(&vm->as.lru_node); -+ } -+ -+out_unlock: -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+out_dev_exit: -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+/** -+ * panthor_vm_idle() - Flag a VM idle -+ * @VM: VM to flag as idle. -+ * -+ * When we know the GPU is done with the VM (no more jobs to process), -+ * we can relinquish the AS slot attached to this VM, if any. -+ * -+ * We don't release the slot immediately, but instead place the VM in -+ * the LRU list, so it can be evicted if another VM needs an AS slot. -+ * This way, VMs keep attached to the AS they were given until we run -+ * out of free slot, limiting the number of MMU operations (TLB flush -+ * and other AS updates). -+ */ -+void panthor_vm_idle(struct panthor_vm *vm) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ -+ if (!refcount_dec_and_mutex_lock(&vm->as.active_cnt, &ptdev->mmu->as.slots_lock)) -+ return; -+ -+ if (!drm_WARN_ON(&ptdev->base, vm->as.id == -1 || !list_empty(&vm->as.lru_node))) -+ list_add_tail(&vm->as.lru_node, &ptdev->mmu->as.lru_list); -+ -+ refcount_set(&vm->as.active_cnt, 0); -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+} -+ -+static void panthor_vm_stop(struct panthor_vm *vm) -+{ -+ drm_sched_stop(&vm->sched, NULL); -+} -+ -+static void panthor_vm_start(struct panthor_vm *vm) -+{ -+ drm_sched_start(&vm->sched, true); -+} -+ -+/** -+ * panthor_vm_as() - Get the AS slot attached to a VM -+ * @vm: VM to get the AS slot of. -+ * -+ * Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise. -+ */ -+int panthor_vm_as(struct panthor_vm *vm) -+{ -+ return vm->as.id; -+} -+ -+static size_t get_pgsize(u64 addr, size_t size, size_t *count) -+{ -+ /* -+ * io-pgtable only operates on multiple pages within a single table -+ * entry, so we need to split at boundaries of the table size, i.e. -+ * the next block size up. The distance from address A to the next -+ * boundary of block size B is logically B - A % B, but in unsigned -+ * two's complement where B is a power of two we get the equivalence -+ * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :) -+ */ -+ size_t blk_offset = -addr % SZ_2M; -+ -+ if (blk_offset || size < SZ_2M) { -+ *count = min_not_zero(blk_offset, size) / SZ_4K; -+ return SZ_4K; -+ } -+ blk_offset = -addr % SZ_1G ?: SZ_1G; -+ *count = min(blk_offset, size) / SZ_2M; -+ return SZ_2M; -+} -+ -+static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ int ret = 0, cookie; -+ -+ if (vm->as.id < 0) -+ return 0; -+ -+ /* If the device is unplugged, we just silently skip the flush. */ -+ if (!drm_dev_enter(&ptdev->base, &cookie)) -+ return 0; -+ -+ /* Flush the PTs only if we're already awake */ -+ if (pm_runtime_active(ptdev->base.dev)) -+ ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT); -+ -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ struct io_pgtable_ops *ops = vm->pgtbl_ops; -+ u64 offset = 0; -+ -+ drm_dbg(&ptdev->base, "unmap: as=%d, iova=%llx, len=%llx", vm->as.id, iova, size); -+ -+ while (offset < size) { -+ size_t unmapped_sz = 0, pgcount; -+ size_t pgsize = get_pgsize(iova + offset, size - offset, &pgcount); -+ -+ unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL); -+ -+ if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) { -+ drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n", -+ iova + offset + unmapped_sz, -+ iova + offset + pgsize * pgcount, -+ iova, iova + size); -+ panthor_vm_flush_range(vm, iova, offset + unmapped_sz); -+ return -EINVAL; -+ } -+ offset += unmapped_sz; -+ } -+ -+ return panthor_vm_flush_range(vm, iova, size); -+} -+ -+static int -+panthor_vm_map_pages(struct panthor_vm *vm, u64 iova, int prot, -+ struct sg_table *sgt, u64 offset, u64 size) -+{ -+ struct panthor_device *ptdev = vm->ptdev; -+ unsigned int count; -+ struct scatterlist *sgl; -+ struct io_pgtable_ops *ops = vm->pgtbl_ops; -+ u64 start_iova = iova; -+ int ret; -+ -+ if (!size) -+ return 0; -+ -+ for_each_sgtable_dma_sg(sgt, sgl, count) { -+ dma_addr_t paddr = sg_dma_address(sgl); -+ size_t len = sg_dma_len(sgl); -+ -+ if (len <= offset) { -+ offset -= len; -+ continue; -+ } -+ -+ paddr -= offset; -+ len -= offset; -+ -+ if (size >= 0) { -+ len = min_t(size_t, len, size); -+ size -= len; -+ } -+ -+ drm_dbg(&ptdev->base, "map: as=%d, iova=%llx, paddr=%pad, len=%zx", -+ vm->as.id, iova, &paddr, len); -+ -+ while (len) { -+ size_t pgcount, mapped = 0; -+ size_t pgsize = get_pgsize(iova | paddr, len, &pgcount); -+ -+ ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, -+ GFP_KERNEL, &mapped); -+ iova += mapped; -+ paddr += mapped; -+ len -= mapped; -+ -+ if (drm_WARN_ON(&ptdev->base, !ret && !mapped)) -+ ret = -ENOMEM; -+ -+ if (ret) { -+ /* If something failed, unmap what we've already mapped before -+ * returning. The unmap call is not supposed to fail. -+ */ -+ drm_WARN_ON(&ptdev->base, -+ panthor_vm_unmap_pages(vm, start_iova, -+ iova - start_iova)); -+ return ret; -+ } -+ } -+ -+ if (!size) -+ break; -+ } -+ -+ return panthor_vm_flush_range(vm, start_iova, iova - start_iova); -+} -+ -+static int flags_to_prot(u32 flags) -+{ -+ int prot = 0; -+ -+ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC) -+ prot |= IOMMU_NOEXEC; -+ -+ if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)) -+ prot |= IOMMU_CACHE; -+ -+ if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY) -+ prot |= IOMMU_READ; -+ else -+ prot |= IOMMU_READ | IOMMU_WRITE; -+ -+ return prot; -+} -+ -+/** -+ * panthor_vm_alloc_va() - Allocate a region in the auto-va space -+ * @VM: VM to allocate a region on. -+ * @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user -+ * wants the VA to be automatically allocated from the auto-VA range. -+ * @size: size of the VA range. -+ * @va_node: drm_mm_node to initialize. Must be zero-initialized. -+ * -+ * Some GPU objects, like heap chunks, are fully managed by the kernel and -+ * need to be mapped to the userspace VM, in the region reserved for kernel -+ * objects. -+ * -+ * This function takes care of allocating a region in the kernel auto-VA space. -+ * -+ * Return: 0 on success, an error code otherwise. -+ */ -+int -+panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, -+ struct drm_mm_node *va_node) -+{ -+ int ret; -+ -+ if (!size || (size & ~PAGE_MASK)) -+ return -EINVAL; -+ -+ if (va != PANTHOR_VM_KERNEL_AUTO_VA && (va & ~PAGE_MASK)) -+ return -EINVAL; -+ -+ mutex_lock(&vm->mm_lock); -+ if (va != PANTHOR_VM_KERNEL_AUTO_VA) { -+ va_node->start = va; -+ va_node->size = size; -+ ret = drm_mm_reserve_node(&vm->mm, va_node); -+ } else { -+ ret = drm_mm_insert_node_in_range(&vm->mm, va_node, size, -+ size >= SZ_2M ? SZ_2M : SZ_4K, -+ 0, vm->kernel_auto_va.start, -+ vm->kernel_auto_va.end, -+ DRM_MM_INSERT_BEST); -+ } -+ mutex_unlock(&vm->mm_lock); -+ -+ return ret; -+} -+ -+/** -+ * panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va() -+ * @VM: VM to free the region on. -+ * @va_node: Memory node representing the region to free. -+ */ -+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node) -+{ -+ mutex_lock(&vm->mm_lock); -+ drm_mm_remove_node(va_node); -+ mutex_unlock(&vm->mm_lock); -+} -+ -+static void panthor_vm_bo_put(struct drm_gpuvm_bo *vm_bo) -+{ -+ struct panthor_gem_object *bo = to_panthor_bo(vm_bo->obj); -+ bool unpin; -+ -+ /* We must retain the GEM before calling drm_gpuvm_bo_put(), -+ * otherwise the mutex might be destroyed while we hold it. -+ */ -+ drm_gem_object_get(&bo->base.base); -+ mutex_lock(&bo->gpuva_list_lock); -+ unpin = drm_gpuvm_bo_put(vm_bo); -+ mutex_unlock(&bo->gpuva_list_lock); -+ -+ /* If the vm_bo object was destroyed, release the pin reference that -+ * was hold by this object. -+ */ -+ if (unpin && !bo->base.base.import_attach) -+ drm_gem_shmem_unpin(&bo->base); -+ -+ drm_gem_object_put(&bo->base.base); -+} -+ -+static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx, -+ struct panthor_vm *vm) -+{ -+ struct panthor_vma *vma, *tmp_vma; -+ -+ u32 remaining_pt_count = op_ctx->rsvd_page_tables.count - -+ op_ctx->rsvd_page_tables.ptr; -+ -+ if (remaining_pt_count) { -+ kmem_cache_free_bulk(pt_cache, remaining_pt_count, -+ op_ctx->rsvd_page_tables.pages + -+ op_ctx->rsvd_page_tables.ptr); -+ } -+ -+ kfree(op_ctx->rsvd_page_tables.pages); -+ memset(&op_ctx->rsvd_page_tables, 0, sizeof(op_ctx->rsvd_page_tables)); -+ -+ if (op_ctx->map.vm_bo) -+ panthor_vm_bo_put(op_ctx->map.vm_bo); -+ -+ memset(&op_ctx->map, 0, sizeof(op_ctx->map)); -+ -+ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { -+ kfree(op_ctx->preallocated_vmas[i]); -+ op_ctx->preallocated_vmas[i] = NULL; -+ } -+ -+ list_for_each_entry_safe(vma, tmp_vma, &op_ctx->returned_vmas, node) { -+ list_del(&vma->node); -+ panthor_vm_bo_put(vma->base.vm_bo); -+ kfree(vma); -+ } -+} -+ -+static struct panthor_vma * -+panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx) -+{ -+ for (u32 i = 0; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) { -+ struct panthor_vma *vma = op_ctx->preallocated_vmas[i]; -+ -+ if (vma) { -+ op_ctx->preallocated_vmas[i] = NULL; -+ return vma; -+ } -+ } -+ -+ return NULL; -+} -+ -+static int -+panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx) -+{ -+ u32 vma_count; -+ -+ switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: -+ /* One VMA for the new mapping, and two more VMAs for the remap case -+ * which might contain both a prev and next VA. -+ */ -+ vma_count = 3; -+ break; -+ -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: -+ /* Partial unmaps might trigger a remap with either a prev or a next VA, -+ * but not both. -+ */ -+ vma_count = 1; -+ break; -+ -+ default: -+ return 0; -+ } -+ -+ for (u32 i = 0; i < vma_count; i++) { -+ struct panthor_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL); -+ -+ if (!vma) -+ return -ENOMEM; -+ -+ op_ctx->preallocated_vmas[i] = vma; -+ } -+ -+ return 0; -+} -+ -+#define PANTHOR_VM_BIND_OP_MAP_FLAGS \ -+ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED | \ -+ DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) -+ -+static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx, -+ struct panthor_vm *vm, -+ struct panthor_gem_object *bo, -+ u64 offset, -+ u64 size, u64 va, -+ u32 flags) -+{ -+ struct drm_gpuvm_bo *preallocated_vm_bo; -+ struct sg_table *sgt = NULL; -+ u64 pt_count; -+ int ret; -+ -+ if (!bo) -+ return -EINVAL; -+ -+ if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) || -+ (flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP) -+ return -EINVAL; -+ -+ /* Make sure the VA and size are aligned and in-bounds. */ -+ if (size > bo->base.base.size || offset > bo->base.base.size - size) -+ return -EINVAL; -+ -+ /* If the BO has an exclusive VM attached, it can't be mapped to other VMs. */ -+ if (bo->exclusive_vm_root_gem && -+ bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm)) -+ return -EINVAL; -+ -+ memset(op_ctx, 0, sizeof(*op_ctx)); -+ INIT_LIST_HEAD(&op_ctx->returned_vmas); -+ op_ctx->flags = flags; -+ op_ctx->va.range = size; -+ op_ctx->va.addr = va; -+ -+ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); -+ if (ret) -+ goto err_cleanup; -+ -+ if (!bo->base.base.import_attach) { -+ /* Pre-reserve the BO pages, so the map operation doesn't have to -+ * allocate. -+ */ -+ ret = drm_gem_shmem_pin(&bo->base); -+ if (ret) -+ goto err_cleanup; -+ } -+ -+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); -+ if (IS_ERR(sgt)) { -+ if (!bo->base.base.import_attach) -+ drm_gem_shmem_unpin(&bo->base); -+ -+ ret = PTR_ERR(sgt); -+ goto err_cleanup; -+ } -+ -+ op_ctx->map.sgt = sgt; -+ -+ preallocated_vm_bo = drm_gpuvm_bo_create(&vm->base, &bo->base.base); -+ if (!preallocated_vm_bo) { -+ if (!bo->base.base.import_attach) -+ drm_gem_shmem_unpin(&bo->base); -+ -+ ret = -ENOMEM; -+ goto err_cleanup; -+ } -+ -+ mutex_lock(&bo->gpuva_list_lock); -+ op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(preallocated_vm_bo); -+ mutex_unlock(&bo->gpuva_list_lock); -+ -+ /* If the a vm_bo for this combination exists, it already -+ * retains a pin ref, and we can release the one we took earlier. -+ * -+ * If our pre-allocated vm_bo is picked, it now retains the pin ref, -+ * which will be released in panthor_vm_bo_put(). -+ */ -+ if (preallocated_vm_bo != op_ctx->map.vm_bo && -+ !bo->base.base.import_attach) -+ drm_gem_shmem_unpin(&bo->base); -+ -+ op_ctx->map.bo_offset = offset; -+ -+ /* L1, L2 and L3 page tables. -+ * We could optimize L3 allocation by iterating over the sgt and merging -+ * 2M contiguous blocks, but it's simpler to over-provision and return -+ * the pages if they're not used. -+ */ -+ pt_count = ((ALIGN(va + size, 1ull << 39) - ALIGN_DOWN(va, 1ull << 39)) >> 39) + -+ ((ALIGN(va + size, 1ull << 30) - ALIGN_DOWN(va, 1ull << 30)) >> 30) + -+ ((ALIGN(va + size, 1ull << 21) - ALIGN_DOWN(va, 1ull << 21)) >> 21); -+ -+ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, -+ sizeof(*op_ctx->rsvd_page_tables.pages), -+ GFP_KERNEL); -+ if (!op_ctx->rsvd_page_tables.pages) -+ goto err_cleanup; -+ -+ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, -+ op_ctx->rsvd_page_tables.pages); -+ op_ctx->rsvd_page_tables.count = ret; -+ if (ret != pt_count) { -+ ret = -ENOMEM; -+ goto err_cleanup; -+ } -+ -+ /* Insert BO into the extobj list last, when we know nothing can fail. */ -+ drm_gpuvm_bo_extobj_add(op_ctx->map.vm_bo); -+ -+ return 0; -+ -+err_cleanup: -+ panthor_vm_cleanup_op_ctx(op_ctx, vm); -+ return ret; -+} -+ -+static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx, -+ struct panthor_vm *vm, -+ u64 va, u64 size) -+{ -+ u32 pt_count = 0; -+ int ret; -+ -+ memset(op_ctx, 0, sizeof(*op_ctx)); -+ INIT_LIST_HEAD(&op_ctx->returned_vmas); -+ op_ctx->va.range = size; -+ op_ctx->va.addr = va; -+ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP; -+ -+ /* Pre-allocate L3 page tables to account for the split-2M-block -+ * situation on unmap. -+ */ -+ if (va != ALIGN(va, SZ_2M)) -+ pt_count++; -+ -+ if (va + size != ALIGN(va + size, SZ_2M) && -+ ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M)) -+ pt_count++; -+ -+ ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx); -+ if (ret) -+ goto err_cleanup; -+ -+ if (pt_count) { -+ op_ctx->rsvd_page_tables.pages = kcalloc(pt_count, -+ sizeof(*op_ctx->rsvd_page_tables.pages), -+ GFP_KERNEL); -+ if (!op_ctx->rsvd_page_tables.pages) -+ goto err_cleanup; -+ -+ ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count, -+ op_ctx->rsvd_page_tables.pages); -+ if (ret != pt_count) { -+ ret = -ENOMEM; -+ goto err_cleanup; -+ } -+ op_ctx->rsvd_page_tables.count = pt_count; -+ } -+ -+ return 0; -+ -+err_cleanup: -+ panthor_vm_cleanup_op_ctx(op_ctx, vm); -+ return ret; -+} -+ -+static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx, -+ struct panthor_vm *vm) -+{ -+ memset(op_ctx, 0, sizeof(*op_ctx)); -+ INIT_LIST_HEAD(&op_ctx->returned_vmas); -+ op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY; -+} -+ -+/** -+ * panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address -+ * @vm: VM to look into. -+ * @va: Virtual address to search for. -+ * @bo_offset: Offset of the GEM object mapped at this virtual address. -+ * Only valid on success. -+ * -+ * The object returned by this function might no longer be mapped when the -+ * function returns. It's the caller responsibility to ensure there's no -+ * concurrent map/unmap operations making the returned value invalid, or -+ * make sure it doesn't matter if the object is no longer mapped. -+ * -+ * Return: A valid pointer on success, an ERR_PTR() otherwise. -+ */ -+struct panthor_gem_object * -+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset) -+{ -+ struct panthor_gem_object *bo = ERR_PTR(-ENOENT); -+ struct drm_gpuva *gpuva; -+ struct panthor_vma *vma; -+ -+ /* Take the VM lock to prevent concurrent map/unmap operations. */ -+ mutex_lock(&vm->op_lock); -+ gpuva = drm_gpuva_find_first(&vm->base, va, 1); -+ vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL; -+ if (vma && vma->base.gem.obj) { -+ drm_gem_object_get(vma->base.gem.obj); -+ bo = to_panthor_bo(vma->base.gem.obj); -+ *bo_offset = vma->base.gem.offset; -+ } -+ mutex_unlock(&vm->op_lock); -+ -+ return bo; -+} -+ -+#define PANTHOR_VM_MIN_KERNEL_VA_SIZE SZ_256M -+ -+static u64 -+panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args, -+ u64 full_va_range) -+{ -+ u64 user_va_range; -+ -+ /* Make sure we have a minimum amount of VA space for kernel objects. */ -+ if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE) -+ return 0; -+ -+ if (args->user_va_range) { -+ /* Use the user provided value if != 0. */ -+ user_va_range = args->user_va_range; -+ } else if (TASK_SIZE_OF(current) < full_va_range) { -+ /* If the task VM size is smaller than the GPU VA range, pick this -+ * as our default user VA range, so userspace can CPU/GPU map buffers -+ * at the same address. -+ */ -+ user_va_range = TASK_SIZE_OF(current); -+ } else { -+ /* If the GPU VA range is smaller than the task VM size, we -+ * just have to live with the fact we won't be able to map -+ * all buffers at the same GPU/CPU address. -+ * -+ * If the GPU VA range is bigger than 4G (more than 32-bit of -+ * VA), we split the range in two, and assign half of it to -+ * the user and the other half to the kernel, if it's not, we -+ * keep the kernel VA space as small as possible. -+ */ -+ user_va_range = full_va_range > SZ_4G ? -+ full_va_range / 2 : -+ full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; -+ } -+ -+ if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range) -+ user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE; -+ -+ return user_va_range; -+} -+ -+#define PANTHOR_VM_CREATE_FLAGS 0 -+ -+static int -+panthor_vm_create_check_args(const struct panthor_device *ptdev, -+ const struct drm_panthor_vm_create *args, -+ u64 *kernel_va_start, u64 *kernel_va_range) -+{ -+ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); -+ u64 full_va_range = 1ull << va_bits; -+ u64 user_va_range; -+ -+ if (args->flags & ~PANTHOR_VM_CREATE_FLAGS) -+ return -EINVAL; -+ -+ user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range); -+ if (!user_va_range || (args->user_va_range && args->user_va_range > user_va_range)) -+ return -EINVAL; -+ -+ /* Pick a kernel VA range that's a power of two, to have a clear split. */ -+ *kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range); -+ *kernel_va_start = full_va_range - *kernel_va_range; -+ return 0; -+} -+ -+/* -+ * Only 32 VMs per open file. If that becomes a limiting factor, we can -+ * increase this number. -+ */ -+#define PANTHOR_MAX_VMS_PER_FILE 32 -+ -+/** -+ * panthor_vm_pool_create_vm() - Create a VM -+ * @pool: The VM to create this VM on. -+ * @kernel_va_start: Start of the region reserved for kernel objects. -+ * @kernel_va_range: Size of the region reserved for kernel objects. -+ * -+ * Return: a positive VM ID on success, a negative error code otherwise. -+ */ -+int panthor_vm_pool_create_vm(struct panthor_device *ptdev, -+ struct panthor_vm_pool *pool, -+ struct drm_panthor_vm_create *args) -+{ -+ u64 kernel_va_start, kernel_va_range; -+ struct panthor_vm *vm; -+ int ret; -+ u32 id; -+ -+ ret = panthor_vm_create_check_args(ptdev, args, &kernel_va_start, &kernel_va_range); -+ if (ret) -+ return ret; -+ -+ vm = panthor_vm_create(ptdev, false, kernel_va_start, kernel_va_range, -+ kernel_va_start, kernel_va_range); -+ if (IS_ERR(vm)) -+ return PTR_ERR(vm); -+ -+ ret = xa_alloc(&pool->xa, &id, vm, -+ XA_LIMIT(1, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL); -+ -+ if (ret) { -+ panthor_vm_put(vm); -+ return ret; -+ } -+ -+ args->user_va_range = kernel_va_start; -+ return id; -+} -+ -+static void panthor_vm_destroy(struct panthor_vm *vm) -+{ -+ if (!vm) -+ return; -+ -+ vm->destroyed = true; -+ -+ mutex_lock(&vm->heaps.lock); -+ panthor_heap_pool_destroy(vm->heaps.pool); -+ vm->heaps.pool = NULL; -+ mutex_unlock(&vm->heaps.lock); -+ -+ drm_WARN_ON(&vm->ptdev->base, -+ panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range)); -+ panthor_vm_put(vm); -+} -+ -+/** -+ * panthor_vm_destroy() - Destroy a VM. -+ * @pool: VM pool. -+ * @handle: VM handle. -+ * -+ * This function doesn't free the VM object or its resources, it just kills -+ * all mappings, and makes sure nothing can be mapped after that point. -+ * -+ * If there was any active jobs at the time this function is called, these -+ * jobs should experience page faults and be killed as a result. -+ * -+ * The VM resources are freed when the last reference on the VM object is -+ * dropped. -+ */ -+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle) -+{ -+ struct panthor_vm *vm; -+ -+ vm = xa_erase(&pool->xa, handle); -+ -+ panthor_vm_destroy(vm); -+ -+ return vm ? 0 : -EINVAL; -+} -+ -+/** -+ * panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle -+ * @pool: VM pool to check. -+ * @handle: Handle of the VM to retrieve. -+ * -+ * Return: A valid pointer if the VM exists, NULL otherwise. -+ */ -+struct panthor_vm * -+panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle) -+{ -+ struct panthor_vm *vm; -+ -+ vm = panthor_vm_get(xa_load(&pool->xa, handle)); -+ -+ return vm; -+} -+ -+/** -+ * panthor_vm_pool_destroy() - Destroy a VM pool. -+ * @pfile: File. -+ * -+ * Destroy all VMs in the pool, and release the pool resources. -+ * -+ * Note that VMs can outlive the pool they were created from if other -+ * objects hold a reference to there VMs. -+ */ -+void panthor_vm_pool_destroy(struct panthor_file *pfile) -+{ -+ struct panthor_vm *vm; -+ unsigned long i; -+ -+ if (!pfile->vms) -+ return; -+ -+ xa_for_each(&pfile->vms->xa, i, vm) -+ panthor_vm_destroy(vm); -+ -+ xa_destroy(&pfile->vms->xa); -+ kfree(pfile->vms); -+} -+ -+/** -+ * panthor_vm_pool_create() - Create a VM pool -+ * @pfile: File. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_pool_create(struct panthor_file *pfile) -+{ -+ pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL); -+ if (!pfile->vms) -+ return -ENOMEM; -+ -+ xa_init_flags(&pfile->vms->xa, XA_FLAGS_ALLOC1); -+ return 0; -+} -+ -+/* dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() */ -+static void mmu_tlb_flush_all(void *cookie) -+{ -+} -+ -+static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie) -+{ -+} -+ -+static const struct iommu_flush_ops mmu_tlb_ops = { -+ .tlb_flush_all = mmu_tlb_flush_all, -+ .tlb_flush_walk = mmu_tlb_flush_walk, -+}; -+ -+static const char *access_type_name(struct panthor_device *ptdev, -+ u32 fault_status) -+{ -+ switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) { -+ case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC: -+ return "ATOMIC"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_READ: -+ return "READ"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_WRITE: -+ return "WRITE"; -+ case AS_FAULTSTATUS_ACCESS_TYPE_EX: -+ return "EXECUTE"; -+ default: -+ drm_WARN_ON(&ptdev->base, 1); -+ return NULL; -+ } -+} -+ -+static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status) -+{ -+ bool has_unhandled_faults = false; -+ -+ status = panthor_mmu_fault_mask(ptdev, status); -+ while (status) { -+ u32 as = ffs(status | (status >> 16)) - 1; -+ u32 mask = panthor_mmu_as_fault_mask(ptdev, as); -+ u32 new_int_mask; -+ u64 addr; -+ u32 fault_status; -+ u32 exception_type; -+ u32 access_type; -+ u32 source_id; -+ -+ fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as)); -+ addr = gpu_read(ptdev, AS_FAULTADDRESS_LO(as)); -+ addr |= (u64)gpu_read(ptdev, AS_FAULTADDRESS_HI(as)) << 32; -+ -+ /* decode the fault status */ -+ exception_type = fault_status & 0xFF; -+ access_type = (fault_status >> 8) & 0x3; -+ source_id = (fault_status >> 16); -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ -+ ptdev->mmu->as.faulty_mask |= mask; -+ new_int_mask = -+ panthor_mmu_fault_mask(ptdev, ~ptdev->mmu->as.faulty_mask); -+ -+ /* terminal fault, print info about the fault */ -+ drm_err(&ptdev->base, -+ "Unhandled Page fault in AS%d at VA 0x%016llX\n" -+ "raw fault status: 0x%X\n" -+ "decoded fault status: %s\n" -+ "exception type 0x%X: %s\n" -+ "access type 0x%X: %s\n" -+ "source id 0x%X\n", -+ as, addr, -+ fault_status, -+ (fault_status & (1 << 10) ? "DECODER FAULT" : "SLAVE FAULT"), -+ exception_type, panthor_exception_name(ptdev, exception_type), -+ access_type, access_type_name(ptdev, fault_status), -+ source_id); -+ -+ /* Ignore MMU interrupts on this AS until it's been -+ * re-enabled. -+ */ -+ ptdev->mmu->irq.mask = new_int_mask; -+ gpu_write(ptdev, MMU_INT_MASK, new_int_mask); -+ -+ if (ptdev->mmu->as.slots[as].vm) -+ ptdev->mmu->as.slots[as].vm->unhandled_fault = true; -+ -+ /* Disable the MMU to kill jobs on this AS. */ -+ panthor_mmu_as_disable(ptdev, as); -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ status &= ~mask; -+ has_unhandled_faults = true; -+ } -+ -+ if (has_unhandled_faults) -+ panthor_sched_report_mmu_fault(ptdev); -+} -+PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler); -+ -+/** -+ * panthor_mmu_suspend() - Suspend the MMU logic -+ * @ptdev: Device. -+ * -+ * All we do here is de-assign the AS slots on all active VMs, so things -+ * get flushed to the main memory, and no further access to these VMs are -+ * possible. -+ * -+ * We also suspend the MMU IRQ. -+ */ -+void panthor_mmu_suspend(struct panthor_device *ptdev) -+{ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { -+ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; -+ -+ if (vm) { -+ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); -+ panthor_vm_release_as_locked(vm); -+ } -+ } -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ panthor_mmu_irq_suspend(&ptdev->mmu->irq); -+} -+ -+/** -+ * panthor_mmu_resume() - Resume the MMU logic -+ * @ptdev: Device. -+ * -+ * Resume the IRQ. -+ * -+ * We don't re-enable previously active VMs. We assume other parts of the -+ * driver will call panthor_vm_active() on the VMs they intend to use. -+ */ -+void panthor_mmu_resume(struct panthor_device *ptdev) -+{ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ ptdev->mmu->as.alloc_mask = 0; -+ ptdev->mmu->as.faulty_mask = 0; -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); -+} -+ -+/** -+ * panthor_mmu_pre_reset() - Prepare for a reset -+ * @ptdev: Device. -+ * -+ * Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we -+ * don't get asked to do a VM operation while the GPU is down. -+ * -+ * We don't cleanly shutdown the AS slots here, because the reset might -+ * come from an AS_ACTIVE_BIT stuck situation. -+ */ -+void panthor_mmu_pre_reset(struct panthor_device *ptdev) -+{ -+ struct panthor_vm *vm; -+ -+ panthor_mmu_irq_suspend(&ptdev->mmu->irq); -+ -+ mutex_lock(&ptdev->mmu->vm.lock); -+ ptdev->mmu->vm.reset_in_progress = true; -+ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) -+ panthor_vm_stop(vm); -+ mutex_unlock(&ptdev->mmu->vm.lock); -+} -+ -+/** -+ * panthor_mmu_post_reset() - Restore things after a reset -+ * @ptdev: Device. -+ * -+ * Put the MMU logic back in action after a reset. That implies resuming the -+ * IRQ and re-enabling the VM_BIND queues. -+ */ -+void panthor_mmu_post_reset(struct panthor_device *ptdev) -+{ -+ struct panthor_vm *vm; -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ -+ /* Now that the reset is effective, we can assume that none of the -+ * AS slots are setup, and clear the faulty flags too. -+ */ -+ ptdev->mmu->as.alloc_mask = 0; -+ ptdev->mmu->as.faulty_mask = 0; -+ -+ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { -+ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; -+ -+ if (vm) -+ panthor_vm_release_as_locked(vm); -+ } -+ -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ panthor_mmu_irq_resume(&ptdev->mmu->irq, panthor_mmu_fault_mask(ptdev, ~0)); -+ -+ /* Restart the VM_BIND queues. */ -+ mutex_lock(&ptdev->mmu->vm.lock); -+ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { -+ panthor_vm_start(vm); -+ } -+ ptdev->mmu->vm.reset_in_progress = false; -+ mutex_unlock(&ptdev->mmu->vm.lock); -+} -+ -+static void panthor_vm_free(struct drm_gpuvm *gpuvm) -+{ -+ struct panthor_vm *vm = container_of(gpuvm, struct panthor_vm, base); -+ struct panthor_device *ptdev = vm->ptdev; -+ -+ mutex_lock(&vm->heaps.lock); -+ if (drm_WARN_ON(&ptdev->base, vm->heaps.pool)) -+ panthor_heap_pool_destroy(vm->heaps.pool); -+ mutex_unlock(&vm->heaps.lock); -+ mutex_destroy(&vm->heaps.lock); -+ -+ mutex_lock(&ptdev->mmu->vm.lock); -+ list_del(&vm->node); -+ /* Restore the scheduler state so we can call drm_sched_entity_destroy() -+ * and drm_sched_fini(). If get there, that means we have no job left -+ * and no new jobs can be queued, so we can start the scheduler without -+ * risking interfering with the reset. -+ */ -+ if (ptdev->mmu->vm.reset_in_progress) -+ panthor_vm_start(vm); -+ mutex_unlock(&ptdev->mmu->vm.lock); -+ -+ drm_sched_entity_destroy(&vm->entity); -+ drm_sched_fini(&vm->sched); -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ if (vm->as.id >= 0) { -+ int cookie; -+ -+ if (drm_dev_enter(&ptdev->base, &cookie)) { -+ panthor_mmu_as_disable(ptdev, vm->as.id); -+ drm_dev_exit(cookie); -+ } -+ -+ ptdev->mmu->as.slots[vm->as.id].vm = NULL; -+ clear_bit(vm->as.id, &ptdev->mmu->as.alloc_mask); -+ list_del(&vm->as.lru_node); -+ } -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+ -+ free_io_pgtable_ops(vm->pgtbl_ops); -+ -+ drm_mm_takedown(&vm->mm); -+ kfree(vm); -+} -+ -+/** -+ * panthor_vm_put() - Release a reference on a VM -+ * @vm: VM to release the reference on. Can be NULL. -+ */ -+void panthor_vm_put(struct panthor_vm *vm) -+{ -+ drm_gpuvm_put(vm ? &vm->base : NULL); -+} -+ -+/** -+ * panthor_vm_get() - Get a VM reference -+ * @vm: VM to get the reference on. Can be NULL. -+ * -+ * Return: @vm value. -+ */ -+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm) -+{ -+ if (vm) -+ drm_gpuvm_get(&vm->base); -+ -+ return vm; -+} -+ -+/** -+ * panthor_vm_get_heap_pool() - Get the heap pool attached to a VM -+ * @vm: VM to query the heap pool on. -+ * @create: True if the heap pool should be created when it doesn't exist. -+ * -+ * Heap pools are per-VM. This function allows one to retrieve the heap pool -+ * attached to a VM. -+ * -+ * If no heap pool exists yet, and @create is true, we create one. -+ * -+ * The returned panthor_heap_pool should be released with panthor_heap_pool_put(). -+ * -+ * Return: A valid pointer on success, an ERR_PTR() otherwise. -+ */ -+struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create) -+{ -+ struct panthor_heap_pool *pool; -+ -+ mutex_lock(&vm->heaps.lock); -+ if (!vm->heaps.pool && create) { -+ if (vm->destroyed) -+ pool = ERR_PTR(-EINVAL); -+ else -+ pool = panthor_heap_pool_create(vm->ptdev, vm); -+ -+ if (!IS_ERR(pool)) -+ vm->heaps.pool = panthor_heap_pool_get(pool); -+ } else { -+ pool = panthor_heap_pool_get(vm->heaps.pool); -+ } -+ mutex_unlock(&vm->heaps.lock); -+ -+ return pool; -+} -+ -+static u64 mair_to_memattr(u64 mair) -+{ -+ u64 memattr = 0; -+ u32 i; -+ -+ for (i = 0; i < 8; i++) { -+ u8 in_attr = mair >> (8 * i), out_attr; -+ u8 outer = in_attr >> 4, inner = in_attr & 0xf; -+ -+ /* For caching to be enabled, inner and outer caching policy -+ * have to be both write-back, if one of them is write-through -+ * or non-cacheable, we just choose non-cacheable. Device -+ * memory is also translated to non-cacheable. -+ */ -+ if (!(outer & 3) || !(outer & 4) || !(inner & 4)) { -+ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC | -+ AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | -+ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); -+ } else { -+ /* Use SH_CPU_INNER mode so SH_IS, which is used when -+ * IOMMU_CACHE is set, actually maps to the standard -+ * definition of inner-shareable and not Mali's -+ * internal-shareable mode. -+ */ -+ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | -+ AS_MEMATTR_AARCH64_SH_CPU_INNER | -+ AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); -+ } -+ -+ memattr |= (u64)out_attr << (8 * i); -+ } -+ -+ return memattr; -+} -+ -+static void panthor_vma_link(struct panthor_vm *vm, -+ struct panthor_vma *vma, -+ struct drm_gpuvm_bo *vm_bo) -+{ -+ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); -+ -+ mutex_lock(&bo->gpuva_list_lock); -+ drm_gpuva_link(&vma->base, vm_bo); -+ drm_gpuvm_bo_put(vm_bo); -+ mutex_unlock(&bo->gpuva_list_lock); -+} -+ -+static void panthor_vma_unlink(struct panthor_vm *vm, -+ struct panthor_vma *vma) -+{ -+ struct panthor_gem_object *bo = to_panthor_bo(vma->base.gem.obj); -+ struct drm_gpuvm_bo *vm_bo = drm_gpuvm_bo_get(vma->base.vm_bo); -+ -+ mutex_lock(&bo->gpuva_list_lock); -+ drm_gpuva_unlink(&vma->base); -+ mutex_unlock(&bo->gpuva_list_lock); -+ -+ /* drm_gpuva_unlink() release the vm_bo, but we manually retained it -+ * when entering this function, so we can implement deferred VMA -+ * destruction. Re-assign it here. -+ */ -+ vma->base.vm_bo = vm_bo; -+ list_add_tail(&vma->node, &vm->op_ctx->returned_vmas); -+} -+ -+static void panthor_vma_init(struct panthor_vma *vma, u32 flags) -+{ -+ INIT_LIST_HEAD(&vma->node); -+ vma->flags = flags; -+} -+ -+#define PANTHOR_VM_MAP_FLAGS \ -+ (DRM_PANTHOR_VM_BIND_OP_MAP_READONLY | \ -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | \ -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED) -+ -+static int panthor_gpuva_sm_step_map(struct drm_gpuva_op *op, void *priv) -+{ -+ struct panthor_vm *vm = priv; -+ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; -+ struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx); -+ int ret; -+ -+ if (!vma) -+ return -EINVAL; -+ -+ panthor_vma_init(vma, op_ctx->flags & PANTHOR_VM_MAP_FLAGS); -+ -+ ret = panthor_vm_map_pages(vm, op->map.va.addr, flags_to_prot(vma->flags), -+ op_ctx->map.sgt, op->map.gem.offset, -+ op->map.va.range); -+ if (ret) -+ return ret; -+ -+ /* Ref owned by the mapping now, clear the obj field so we don't release the -+ * pinning/obj ref behind GPUVA's back. -+ */ -+ drm_gpuva_map(&vm->base, &vma->base, &op->map); -+ panthor_vma_link(vm, vma, op_ctx->map.vm_bo); -+ op_ctx->map.vm_bo = NULL; -+ return 0; -+} -+ -+static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op, -+ void *priv) -+{ -+ struct panthor_vma *unmap_vma = container_of(op->remap.unmap->va, struct panthor_vma, base); -+ struct panthor_vm *vm = priv; -+ struct panthor_vm_op_ctx *op_ctx = vm->op_ctx; -+ struct panthor_vma *prev_vma = NULL, *next_vma = NULL; -+ u64 unmap_start, unmap_range; -+ int ret; -+ -+ drm_gpuva_op_remap_to_unmap_range(&op->remap, &unmap_start, &unmap_range); -+ ret = panthor_vm_unmap_pages(vm, unmap_start, unmap_range); -+ if (ret) -+ return ret; -+ -+ if (op->remap.prev) { -+ prev_vma = panthor_vm_op_ctx_get_vma(op_ctx); -+ panthor_vma_init(prev_vma, unmap_vma->flags); -+ } -+ -+ if (op->remap.next) { -+ next_vma = panthor_vm_op_ctx_get_vma(op_ctx); -+ panthor_vma_init(next_vma, unmap_vma->flags); -+ } -+ -+ drm_gpuva_remap(prev_vma ? &prev_vma->base : NULL, -+ next_vma ? &next_vma->base : NULL, -+ &op->remap); -+ -+ if (prev_vma) { -+ /* panthor_vma_link() transfers the vm_bo ownership to -+ * the VMA object. Since the vm_bo we're passing is still -+ * owned by the old mapping which will be released when this -+ * mapping is destroyed, we need to grab a ref here. -+ */ -+ panthor_vma_link(vm, prev_vma, -+ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); -+ } -+ -+ if (next_vma) { -+ panthor_vma_link(vm, next_vma, -+ drm_gpuvm_bo_get(op->remap.unmap->va->vm_bo)); -+ } -+ -+ panthor_vma_unlink(vm, unmap_vma); -+ return 0; -+} -+ -+static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op, -+ void *priv) -+{ -+ struct panthor_vma *unmap_vma = container_of(op->unmap.va, struct panthor_vma, base); -+ struct panthor_vm *vm = priv; -+ int ret; -+ -+ ret = panthor_vm_unmap_pages(vm, unmap_vma->base.va.addr, -+ unmap_vma->base.va.range); -+ if (drm_WARN_ON(&vm->ptdev->base, ret)) -+ return ret; -+ -+ drm_gpuva_unmap(&op->unmap); -+ panthor_vma_unlink(vm, unmap_vma); -+ return 0; -+} -+ -+static const struct drm_gpuvm_ops panthor_gpuvm_ops = { -+ .vm_free = panthor_vm_free, -+ .sm_step_map = panthor_gpuva_sm_step_map, -+ .sm_step_remap = panthor_gpuva_sm_step_remap, -+ .sm_step_unmap = panthor_gpuva_sm_step_unmap, -+}; -+ -+/** -+ * panthor_vm_resv() - Get the dma_resv object attached to a VM. -+ * @vm: VM to get the dma_resv of. -+ * -+ * Return: A dma_resv object. -+ */ -+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm) -+{ -+ return drm_gpuvm_resv(&vm->base); -+} -+ -+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm) -+{ -+ if (!vm) -+ return NULL; -+ -+ return vm->base.r_obj; -+} -+ -+static int -+panthor_vm_exec_op(struct panthor_vm *vm, struct panthor_vm_op_ctx *op, -+ bool flag_vm_unusable_on_failure) -+{ -+ u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK; -+ int ret; -+ -+ if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY) -+ return 0; -+ -+ mutex_lock(&vm->op_lock); -+ vm->op_ctx = op; -+ switch (op_type) { -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: -+ if (vm->unusable) { -+ ret = -EINVAL; -+ break; -+ } -+ -+ ret = drm_gpuvm_sm_map(&vm->base, vm, op->va.addr, op->va.range, -+ op->map.vm_bo->obj, op->map.bo_offset); -+ break; -+ -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: -+ ret = drm_gpuvm_sm_unmap(&vm->base, vm, op->va.addr, op->va.range); -+ break; -+ -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ if (ret && flag_vm_unusable_on_failure) -+ vm->unusable = true; -+ -+ vm->op_ctx = NULL; -+ mutex_unlock(&vm->op_lock); -+ -+ return ret; -+} -+ -+static struct dma_fence * -+panthor_vm_bind_run_job(struct drm_sched_job *sched_job) -+{ -+ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); -+ bool cookie; -+ int ret; -+ -+ /* Not only we report an error whose result is propagated to the -+ * drm_sched finished fence, but we also flag the VM as unusable, because -+ * a failure in the async VM_BIND results in an inconsistent state. VM needs -+ * to be destroyed and recreated. -+ */ -+ cookie = dma_fence_begin_signalling(); -+ ret = panthor_vm_exec_op(job->vm, &job->ctx, true); -+ dma_fence_end_signalling(cookie); -+ -+ return ret ? ERR_PTR(ret) : NULL; -+} -+ -+static void panthor_vm_bind_job_release(struct kref *kref) -+{ -+ struct panthor_vm_bind_job *job = container_of(kref, struct panthor_vm_bind_job, refcount); -+ -+ if (job->base.s_fence) -+ drm_sched_job_cleanup(&job->base); -+ -+ panthor_vm_cleanup_op_ctx(&job->ctx, job->vm); -+ panthor_vm_put(job->vm); -+ kfree(job); -+} -+ -+/** -+ * panthor_vm_bind_job_put() - Release a VM_BIND job reference -+ * @sched_job: Job to release the reference on. -+ */ -+void panthor_vm_bind_job_put(struct drm_sched_job *sched_job) -+{ -+ struct panthor_vm_bind_job *job = -+ container_of(sched_job, struct panthor_vm_bind_job, base); -+ -+ if (sched_job) -+ kref_put(&job->refcount, panthor_vm_bind_job_release); -+} -+ -+static void -+panthor_vm_bind_free_job(struct drm_sched_job *sched_job) -+{ -+ struct panthor_vm_bind_job *job = -+ container_of(sched_job, struct panthor_vm_bind_job, base); -+ -+ drm_sched_job_cleanup(sched_job); -+ -+ /* Do the heavy cleanups asynchronously, so we're out of the -+ * dma-signaling path and can acquire dma-resv locks safely. -+ */ -+ queue_work(panthor_cleanup_wq, &job->cleanup_op_ctx_work); -+} -+ -+static enum drm_gpu_sched_stat -+panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job) -+{ -+ WARN(1, "VM_BIND ops are synchronous for now, there should be no timeout!"); -+ return DRM_GPU_SCHED_STAT_NOMINAL; -+} -+ -+static const struct drm_sched_backend_ops panthor_vm_bind_ops = { -+ .run_job = panthor_vm_bind_run_job, -+ .free_job = panthor_vm_bind_free_job, -+ .timedout_job = panthor_vm_bind_timedout_job, -+}; -+ -+/** -+ * panthor_vm_create() - Create a VM -+ * @ptdev: Device. -+ * @for_mcu: True if this is the FW MCU VM. -+ * @kernel_va_start: Start of the range reserved for kernel BO mapping. -+ * @kernel_va_size: Size of the range reserved for kernel BO mapping. -+ * @auto_kernel_va_start: Start of the auto-VA kernel range. -+ * @auto_kernel_va_size: Size of the auto-VA kernel range. -+ * -+ * Return: A valid pointer on success, an ERR_PTR() otherwise. -+ */ -+struct panthor_vm * -+panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, -+ u64 kernel_va_start, u64 kernel_va_size, -+ u64 auto_kernel_va_start, u64 auto_kernel_va_size) -+{ -+ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); -+ u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features); -+ u64 full_va_range = 1ull << va_bits; -+ struct drm_gem_object *dummy_gem; -+ struct drm_gpu_scheduler *sched; -+ struct io_pgtable_cfg pgtbl_cfg; -+ u64 mair, min_va, va_range; -+ struct panthor_vm *vm; -+ int ret; -+ -+ vm = kzalloc(sizeof(*vm), GFP_KERNEL); -+ if (!vm) -+ return ERR_PTR(-ENOMEM); -+ -+ /* We allocate a dummy GEM for the VM. */ -+ dummy_gem = drm_gpuvm_resv_object_alloc(&ptdev->base); -+ if (!dummy_gem) { -+ ret = -ENOMEM; -+ goto err_free_vm; -+ } -+ -+ mutex_init(&vm->heaps.lock); -+ vm->for_mcu = for_mcu; -+ vm->ptdev = ptdev; -+ mutex_init(&vm->op_lock); -+ -+ if (for_mcu) { -+ /* CSF MCU is a cortex M7, and can only address 4G */ -+ min_va = 0; -+ va_range = SZ_4G; -+ } else { -+ min_va = 0; -+ va_range = full_va_range; -+ } -+ -+ mutex_init(&vm->mm_lock); -+ drm_mm_init(&vm->mm, kernel_va_start, kernel_va_size); -+ vm->kernel_auto_va.start = auto_kernel_va_start; -+ vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - 1; -+ -+ INIT_LIST_HEAD(&vm->node); -+ INIT_LIST_HEAD(&vm->as.lru_node); -+ vm->as.id = -1; -+ refcount_set(&vm->as.active_cnt, 0); -+ -+ pgtbl_cfg = (struct io_pgtable_cfg) { -+ .pgsize_bitmap = SZ_4K | SZ_2M, -+ .ias = va_bits, -+ .oas = pa_bits, -+ .coherent_walk = ptdev->coherent, -+ .tlb = &mmu_tlb_ops, -+ .iommu_dev = ptdev->base.dev, -+ .alloc = alloc_pt, -+ .free = free_pt, -+ }; -+ -+ vm->pgtbl_ops = alloc_io_pgtable_ops(ARM_64_LPAE_S1, &pgtbl_cfg, vm); -+ if (!vm->pgtbl_ops) { -+ ret = -EINVAL; -+ goto err_mm_takedown; -+ } -+ -+ /* Bind operations are synchronous for now, no timeout needed. */ -+ ret = drm_sched_init(&vm->sched, &panthor_vm_bind_ops, ptdev->mmu->vm.wq, -+ 1, 1, 0, -+ MAX_SCHEDULE_TIMEOUT, NULL, NULL, -+ "panthor-vm-bind", ptdev->base.dev); -+ if (ret) -+ goto err_free_io_pgtable; -+ -+ sched = &vm->sched; -+ ret = drm_sched_entity_init(&vm->entity, 0, &sched, 1, NULL); -+ if (ret) -+ goto err_sched_fini; -+ -+ mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; -+ vm->memattr = mair_to_memattr(mair); -+ -+ mutex_lock(&ptdev->mmu->vm.lock); -+ list_add_tail(&vm->node, &ptdev->mmu->vm.list); -+ -+ /* If a reset is in progress, stop the scheduler. */ -+ if (ptdev->mmu->vm.reset_in_progress) -+ panthor_vm_stop(vm); -+ mutex_unlock(&ptdev->mmu->vm.lock); -+ -+ /* We intentionally leave the reserved range to zero, because we want kernel VMAs -+ * to be handled the same way user VMAs are. -+ */ -+ drm_gpuvm_init(&vm->base, -+ for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM", -+ 0, &ptdev->base, dummy_gem, min_va, va_range, 0, 0, -+ &panthor_gpuvm_ops); -+ drm_gem_object_put(dummy_gem); -+ return vm; -+ -+err_sched_fini: -+ drm_sched_fini(&vm->sched); -+ -+err_free_io_pgtable: -+ free_io_pgtable_ops(vm->pgtbl_ops); -+ -+err_mm_takedown: -+ drm_mm_takedown(&vm->mm); -+ drm_gem_object_put(dummy_gem); -+ -+err_free_vm: -+ kfree(vm); -+ return ERR_PTR(ret); -+} -+ -+static int -+panthor_vm_bind_prepare_op_ctx(struct drm_file *file, -+ struct panthor_vm *vm, -+ const struct drm_panthor_vm_bind_op *op, -+ struct panthor_vm_op_ctx *op_ctx) -+{ -+ struct drm_gem_object *gem; -+ int ret; -+ -+ /* Aligned on page size. */ -+ if ((op->va | op->size) & ~PAGE_MASK) -+ return -EINVAL; -+ -+ switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) { -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: -+ gem = drm_gem_object_lookup(file, op->bo_handle); -+ ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm, -+ gem ? to_panthor_bo(gem) : NULL, -+ op->bo_offset, -+ op->size, -+ op->va, -+ op->flags); -+ drm_gem_object_put(gem); -+ return ret; -+ -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP: -+ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) -+ return -EINVAL; -+ -+ if (op->bo_handle || op->bo_offset) -+ return -EINVAL; -+ -+ return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, op->va, op->size); -+ -+ case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY: -+ if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) -+ return -EINVAL; -+ -+ if (op->bo_handle || op->bo_offset) -+ return -EINVAL; -+ -+ if (op->va || op->size) -+ return -EINVAL; -+ -+ if (!op->syncs.count) -+ return -EINVAL; -+ -+ panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm); -+ return 0; -+ -+ default: -+ return -EINVAL; -+ } -+} -+ -+static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work) -+{ -+ struct panthor_vm_bind_job *job = -+ container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work); -+ -+ panthor_vm_cleanup_op_ctx(&job->ctx, job->vm); -+ panthor_vm_bind_job_put(&job->base); -+} -+ -+/** -+ * panthor_vm_bind_job_create() - Create a VM_BIND job -+ * @file: File. -+ * @vm: VM targeted by the VM_BIND job. -+ * @op: VM operation data. -+ * -+ * Return: A valid pointer on success, an ERR_PTR() otherwise. -+ */ -+struct drm_sched_job * -+panthor_vm_bind_job_create(struct drm_file *file, -+ struct panthor_vm *vm, -+ const struct drm_panthor_vm_bind_op *op) -+{ -+ struct panthor_vm_bind_job *job; -+ int ret; -+ -+ if (!vm) -+ return ERR_PTR(-EINVAL); -+ -+ if (vm->destroyed || vm->unusable) -+ return ERR_PTR(-EINVAL); -+ -+ job = kzalloc(sizeof(*job), GFP_KERNEL); -+ if (!job) -+ return ERR_PTR(-ENOMEM); -+ -+ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &job->ctx); -+ if (ret) { -+ kfree(job); -+ return ERR_PTR(ret); -+ } -+ -+ INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work); -+ kref_init(&job->refcount); -+ job->vm = panthor_vm_get(vm); -+ -+ ret = drm_sched_job_init(&job->base, &vm->entity, 1, vm); -+ if (ret) -+ goto err_put_job; -+ -+ return &job->base; -+ -+err_put_job: -+ panthor_vm_bind_job_put(&job->base); -+ return ERR_PTR(ret); -+} -+ -+/** -+ * panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs -+ * @exec: The locking/preparation context. -+ * @sched_job: The job to prepare resvs on. -+ * -+ * Locks and prepare the VM resv. -+ * -+ * If this is a map operation, locks and prepares the GEM resv. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, -+ struct drm_sched_job *sched_job) -+{ -+ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); -+ int ret; -+ -+ /* Acquire the VM lock an reserve a slot for this VM bind job. */ -+ ret = drm_gpuvm_prepare_vm(&job->vm->base, exec, 1); -+ if (ret) -+ return ret; -+ -+ if (job->ctx.map.vm_bo) { -+ /* Lock/prepare the GEM being mapped. */ -+ ret = drm_exec_prepare_obj(exec, job->ctx.map.vm_bo->obj, 1); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job -+ * @exec: drm_exec context. -+ * @sched_job: Job to update the resvs on. -+ */ -+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, -+ struct drm_sched_job *sched_job) -+{ -+ struct panthor_vm_bind_job *job = container_of(sched_job, struct panthor_vm_bind_job, base); -+ -+ /* Explicit sync => we just register our job finished fence as bookkeep. */ -+ drm_gpuvm_resv_add_fence(&job->vm->base, exec, -+ &sched_job->s_fence->finished, -+ DMA_RESV_USAGE_BOOKKEEP, -+ DMA_RESV_USAGE_BOOKKEEP); -+} -+ -+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, -+ struct dma_fence *fence, -+ enum dma_resv_usage private_usage, -+ enum dma_resv_usage extobj_usage) -+{ -+ drm_gpuvm_resv_add_fence(&vm->base, exec, fence, private_usage, extobj_usage); -+} -+ -+/** -+ * panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously. -+ * @file: File. -+ * @vm: VM targeted by the VM operation. -+ * @op: Data describing the VM operation. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_bind_exec_sync_op(struct drm_file *file, -+ struct panthor_vm *vm, -+ struct drm_panthor_vm_bind_op *op) -+{ -+ struct panthor_vm_op_ctx op_ctx; -+ int ret; -+ -+ /* No sync objects allowed on synchronous operations. */ -+ if (op->syncs.count) -+ return -EINVAL; -+ -+ if (!op->size) -+ return 0; -+ -+ ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, &op_ctx); -+ if (ret) -+ return ret; -+ -+ ret = panthor_vm_exec_op(vm, &op_ctx, false); -+ panthor_vm_cleanup_op_ctx(&op_ctx, vm); -+ -+ return ret; -+} -+ -+/** -+ * panthor_vm_map_bo_range() - Map a GEM object range to a VM -+ * @vm: VM to map the GEM to. -+ * @bo: GEM object to map. -+ * @offset: Offset in the GEM object. -+ * @size: Size to map. -+ * @va: Virtual address to map the object to. -+ * @flags: Combination of drm_panthor_vm_bind_op_flags flags. -+ * Only map-related flags are valid. -+ * -+ * Internal use only. For userspace requests, use -+ * panthor_vm_bind_exec_sync_op() instead. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, -+ u64 offset, u64 size, u64 va, u32 flags) -+{ -+ struct panthor_vm_op_ctx op_ctx; -+ int ret; -+ -+ ret = panthor_vm_prepare_map_op_ctx(&op_ctx, vm, bo, offset, size, va, flags); -+ if (ret) -+ return ret; -+ -+ ret = panthor_vm_exec_op(vm, &op_ctx, false); -+ panthor_vm_cleanup_op_ctx(&op_ctx, vm); -+ -+ return ret; -+} -+ -+/** -+ * panthor_vm_unmap_range() - Unmap a portion of the VA space -+ * @vm: VM to unmap the region from. -+ * @va: Virtual address to unmap. Must be 4k aligned. -+ * @size: Size of the region to unmap. Must be 4k aligned. -+ * -+ * Internal use only. For userspace requests, use -+ * panthor_vm_bind_exec_sync_op() instead. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size) -+{ -+ struct panthor_vm_op_ctx op_ctx; -+ int ret; -+ -+ ret = panthor_vm_prepare_unmap_op_ctx(&op_ctx, vm, va, size); -+ if (ret) -+ return ret; -+ -+ ret = panthor_vm_exec_op(vm, &op_ctx, false); -+ panthor_vm_cleanup_op_ctx(&op_ctx, vm); -+ -+ return ret; -+} -+ -+/** -+ * panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs. -+ * @exec: Locking/preparation context. -+ * @vm: VM targeted by the GPU job. -+ * @slot_count: Number of slots to reserve. -+ * -+ * GPU jobs assume all BOs bound to the VM at the time the job is submitted -+ * are available when the job is executed. In order to guarantee that, we -+ * need to reserve a slot on all BOs mapped to a VM and update this slot with -+ * the job fence after its submission. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm *vm, -+ u32 slot_count) -+{ -+ int ret; -+ -+ /* Acquire the VM lock and reserve a slot for this GPU job. */ -+ ret = drm_gpuvm_prepare_vm(&vm->base, exec, slot_count); -+ if (ret) -+ return ret; -+ -+ return drm_gpuvm_prepare_objects(&vm->base, exec, slot_count); -+} -+ -+/** -+ * panthor_mmu_unplug() - Unplug the MMU logic -+ * @ptdev: Device. -+ * -+ * No access to the MMU regs should be done after this function is called. -+ * We suspend the IRQ and disable all VMs to guarantee that. -+ */ -+void panthor_mmu_unplug(struct panthor_device *ptdev) -+{ -+ panthor_mmu_irq_suspend(&ptdev->mmu->irq); -+ -+ mutex_lock(&ptdev->mmu->as.slots_lock); -+ for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { -+ struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm; -+ -+ if (vm) { -+ drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i)); -+ panthor_vm_release_as_locked(vm); -+ } -+ } -+ mutex_unlock(&ptdev->mmu->as.slots_lock); -+} -+ -+static void panthor_mmu_release_wq(struct drm_device *ddev, void *res) -+{ -+ destroy_workqueue(res); -+} -+ -+/** -+ * panthor_mmu_init() - Initialize the MMU logic. -+ * @ptdev: Device. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_mmu_init(struct panthor_device *ptdev) -+{ -+ u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features); -+ struct panthor_mmu *mmu; -+ int ret, irq; -+ -+ mmu = drmm_kzalloc(&ptdev->base, sizeof(*mmu), GFP_KERNEL); -+ if (!mmu) -+ return -ENOMEM; -+ -+ INIT_LIST_HEAD(&mmu->as.lru_list); -+ -+ ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock); -+ if (ret) -+ return ret; -+ -+ INIT_LIST_HEAD(&mmu->vm.list); -+ ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock); -+ if (ret) -+ return ret; -+ -+ ptdev->mmu = mmu; -+ -+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu"); -+ if (irq <= 0) -+ return -ENODEV; -+ -+ ret = panthor_request_mmu_irq(ptdev, &mmu->irq, irq, -+ panthor_mmu_fault_mask(ptdev, ~0)); -+ if (ret) -+ return ret; -+ -+ mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, 0); -+ if (!mmu->vm.wq) -+ return -ENOMEM; -+ -+ /* On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction, -+ * which passes iova as an unsigned long. Patch the mmu_features to reflect this -+ * limitation. -+ */ -+ if (sizeof(unsigned long) * 8 < va_bits) { -+ ptdev->gpu_info.mmu_features &= ~GENMASK(7, 0); -+ ptdev->gpu_info.mmu_features |= sizeof(unsigned long) * 8; -+ } -+ -+ return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq); -+} -+ -+#ifdef CONFIG_DEBUG_FS -+static int show_vm_gpuvas(struct panthor_vm *vm, struct seq_file *m) -+{ -+ int ret; -+ -+ mutex_lock(&vm->op_lock); -+ ret = drm_debugfs_gpuva_info(m, &vm->base); -+ mutex_unlock(&vm->op_lock); -+ -+ return ret; -+} -+ -+static int show_each_vm(struct seq_file *m, void *arg) -+{ -+ struct drm_info_node *node = (struct drm_info_node *)m->private; -+ struct drm_device *ddev = node->minor->dev; -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ int (*show)(struct panthor_vm *, struct seq_file *) = node->info_ent->data; -+ struct panthor_vm *vm; -+ int ret = 0; -+ -+ mutex_lock(&ptdev->mmu->vm.lock); -+ list_for_each_entry(vm, &ptdev->mmu->vm.list, node) { -+ ret = show(vm, m); -+ if (ret < 0) -+ break; -+ -+ seq_puts(m, "\n"); -+ } -+ mutex_unlock(&ptdev->mmu->vm.lock); -+ -+ return ret; -+} -+ -+static struct drm_info_list panthor_mmu_debugfs_list[] = { -+ DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas), -+}; -+ -+/** -+ * panthor_mmu_debugfs_init() - Initialize MMU debugfs entries -+ * @minor: Minor. -+ */ -+void panthor_mmu_debugfs_init(struct drm_minor *minor) -+{ -+ drm_debugfs_create_files(panthor_mmu_debugfs_list, -+ ARRAY_SIZE(panthor_mmu_debugfs_list), -+ minor->debugfs_root, minor); -+} -+#endif /* CONFIG_DEBUG_FS */ -+ -+/** -+ * panthor_mmu_pt_cache_init() - Initialize the page table cache. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_mmu_pt_cache_init(void) -+{ -+ pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, 0, NULL); -+ if (!pt_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+/** -+ * panthor_mmu_pt_cache_fini() - Destroy the page table cache. -+ */ -+void panthor_mmu_pt_cache_fini(void) -+{ -+ kmem_cache_destroy(pt_cache); -+} -diff --git a/drivers/gpu/drm/panthor/panthor_mmu.h b/drivers/gpu/drm/panthor/panthor_mmu.h -new file mode 100644 -index 000000000000..f3c1ed19f973 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_mmu.h -@@ -0,0 +1,102 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2019 Linaro, Ltd, Rob Herring */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_MMU_H__ -+#define __PANTHOR_MMU_H__ -+ -+#include -+ -+struct drm_exec; -+struct drm_sched_job; -+struct panthor_gem_object; -+struct panthor_heap_pool; -+struct panthor_vm; -+struct panthor_vma; -+struct panthor_mmu; -+ -+int panthor_mmu_init(struct panthor_device *ptdev); -+void panthor_mmu_unplug(struct panthor_device *ptdev); -+void panthor_mmu_pre_reset(struct panthor_device *ptdev); -+void panthor_mmu_post_reset(struct panthor_device *ptdev); -+void panthor_mmu_suspend(struct panthor_device *ptdev); -+void panthor_mmu_resume(struct panthor_device *ptdev); -+ -+int panthor_vm_map_bo_range(struct panthor_vm *vm, struct panthor_gem_object *bo, -+ u64 offset, u64 size, u64 va, u32 flags); -+int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size); -+struct panthor_gem_object * -+panthor_vm_get_bo_for_va(struct panthor_vm *vm, u64 va, u64 *bo_offset); -+ -+int panthor_vm_active(struct panthor_vm *vm); -+void panthor_vm_idle(struct panthor_vm *vm); -+int panthor_vm_as(struct panthor_vm *vm); -+ -+struct panthor_heap_pool * -+panthor_vm_get_heap_pool(struct panthor_vm *vm, bool create); -+ -+struct panthor_vm *panthor_vm_get(struct panthor_vm *vm); -+void panthor_vm_put(struct panthor_vm *vm); -+struct panthor_vm *panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, -+ u64 kernel_va_start, u64 kernel_va_size, -+ u64 kernel_auto_va_start, -+ u64 kernel_auto_va_size); -+ -+int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, -+ struct panthor_vm *vm, -+ u32 slot_count); -+int panthor_vm_add_bos_resvs_deps_to_job(struct panthor_vm *vm, -+ struct drm_sched_job *job); -+void panthor_vm_add_job_fence_to_bos_resvs(struct panthor_vm *vm, -+ struct drm_sched_job *job); -+ -+struct dma_resv *panthor_vm_resv(struct panthor_vm *vm); -+struct drm_gem_object *panthor_vm_root_gem(struct panthor_vm *vm); -+ -+void panthor_vm_pool_destroy(struct panthor_file *pfile); -+int panthor_vm_pool_create(struct panthor_file *pfile); -+int panthor_vm_pool_create_vm(struct panthor_device *ptdev, -+ struct panthor_vm_pool *pool, -+ struct drm_panthor_vm_create *args); -+int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle); -+struct panthor_vm *panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle); -+ -+bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm); -+bool panthor_vm_is_unusable(struct panthor_vm *vm); -+ -+/* -+ * PANTHOR_VM_KERNEL_AUTO_VA: Use this magic address when you want the GEM -+ * logic to auto-allocate the virtual address in the reserved kernel VA range. -+ */ -+#define PANTHOR_VM_KERNEL_AUTO_VA ~0ull -+ -+int panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size, -+ struct drm_mm_node *va_node); -+void panthor_vm_free_va(struct panthor_vm *vm, struct drm_mm_node *va_node); -+ -+int panthor_vm_bind_exec_sync_op(struct drm_file *file, -+ struct panthor_vm *vm, -+ struct drm_panthor_vm_bind_op *op); -+ -+struct drm_sched_job * -+panthor_vm_bind_job_create(struct drm_file *file, -+ struct panthor_vm *vm, -+ const struct drm_panthor_vm_bind_op *op); -+void panthor_vm_bind_job_put(struct drm_sched_job *job); -+int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec, -+ struct drm_sched_job *job); -+void panthor_vm_bind_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); -+ -+void panthor_vm_update_resvs(struct panthor_vm *vm, struct drm_exec *exec, -+ struct dma_fence *fence, -+ enum dma_resv_usage private_usage, -+ enum dma_resv_usage extobj_usage); -+ -+int panthor_mmu_pt_cache_init(void); -+void panthor_mmu_pt_cache_fini(void); -+ -+#ifdef CONFIG_DEBUG_FS -+void panthor_mmu_debugfs_init(struct drm_minor *minor); -+#endif -+ -+#endif --- -2.42.0 - - -From 0555d01c2719ef1d698339eb9b21ec604264781c Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:39 +0100 -Subject: [PATCH 58/81] drm/panthor: Add the FW logical block - -Contains everything that's FW related, that includes the code dealing -with the microcontroller unit (MCU) that's running the FW, and anything -related to allocating memory shared between the FW and the CPU. - -A few global FW events are processed in the IRQ handler, the rest is -forwarded to the scheduler, since scheduling is the primary reason for -the FW existence, and also the main source of FW <-> kernel -interactions. - -v4: -- Add a MODULE_FIRMWARE() entry for gen 10.8 -- Fix a wrong return ERR_PTR() in panthor_fw_load_section_entry() -- Fix typos -- Add Steve's R-b - -v3: -- Make the FW path more future-proof (Liviu) -- Use one waitqueue for all FW events -- Simplify propagation of FW events to the scheduler logic -- Drop the panthor_fw_mem abstraction and use panthor_kernel_bo instead -- Account for the panthor_vm changes -- Replace magic number with 0x7fffffff with ~0 to better signify that - it's the maximum permitted value. -- More accurate rounding when computing the firmware timeout. -- Add a 'sub iterator' helper function. This also adds a check that a - firmware entry doesn't overflow the firmware image. -- Drop __packed from FW structures, natural alignment is good enough. -- Other minor code improvements. - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-9-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_fw.c | 1336 ++++++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_fw.h | 504 ++++++++++ - 2 files changed, 1840 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_fw.c - create mode 100644 drivers/gpu/drm/panthor/panthor_fw.h - -diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c -new file mode 100644 -index 000000000000..d09614ae3c15 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_fw.c -@@ -0,0 +1,1336 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifdef CONFIG_ARM_ARCH_TIMER -+#include -+#endif -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "panthor_device.h" -+#include "panthor_gem.h" -+#include "panthor_gpu.h" -+#include "panthor_regs.h" -+#include "panthor_fw.h" -+#include "panthor_mmu.h" -+#include "panthor_sched.h" -+ -+#define CSF_FW_NAME "mali_csffw.bin" -+ -+#define PING_INTERVAL_MS 12000 -+#define PROGRESS_TIMEOUT_CYCLES (5ull * 500 * 1024 * 1024) -+#define PROGRESS_TIMEOUT_SCALE_SHIFT 10 -+#define IDLE_HYSTERESIS_US 800 -+#define PWROFF_HYSTERESIS_US 10000 -+ -+/** -+ * struct panthor_fw_binary_hdr - Firmware binary header. -+ */ -+struct panthor_fw_binary_hdr { -+ /** @magic: Magic value to check binary validity. */ -+ u32 magic; -+#define CSF_FW_BINARY_HEADER_MAGIC 0xc3f13a6e -+ -+ /** @minor: Minor FW version. */ -+ u8 minor; -+ -+ /** @major: Major FW version. */ -+ u8 major; -+#define CSF_FW_BINARY_HEADER_MAJOR_MAX 0 -+ -+ /** @padding1: MBZ. */ -+ u16 padding1; -+ -+ /** @version_hash: FW version hash. */ -+ u32 version_hash; -+ -+ /** @padding2: MBZ. */ -+ u32 padding2; -+ -+ /** @size: FW binary size. */ -+ u32 size; -+}; -+ -+/** -+ * enum panthor_fw_binary_entry_type - Firmware binary entry type -+ */ -+enum panthor_fw_binary_entry_type { -+ /** @CSF_FW_BINARY_ENTRY_TYPE_IFACE: Host <-> FW interface. */ -+ CSF_FW_BINARY_ENTRY_TYPE_IFACE = 0, -+ -+ /** @CSF_FW_BINARY_ENTRY_TYPE_CONFIG: FW config. */ -+ CSF_FW_BINARY_ENTRY_TYPE_CONFIG = 1, -+ -+ /** @CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: Unit-tests. */ -+ CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST = 2, -+ -+ /** @CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: Trace buffer interface. */ -+ CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER = 3, -+ -+ /** @CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: Timeline metadata interface. */ -+ CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA = 4, -+}; -+ -+#define CSF_FW_BINARY_ENTRY_TYPE(ehdr) ((ehdr) & 0xff) -+#define CSF_FW_BINARY_ENTRY_SIZE(ehdr) (((ehdr) >> 8) & 0xff) -+#define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) -+#define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) -+ -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) -+ -+#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ -+ (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ -+ CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) -+ -+/** -+ * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary -+ */ -+struct panthor_fw_binary_section_entry_hdr { -+ /** @flags: Section flags. */ -+ u32 flags; -+ -+ /** @va: MCU virtual range to map this binary section to. */ -+ struct { -+ /** @start: Start address. */ -+ u32 start; -+ -+ /** @end: End address. */ -+ u32 end; -+ } va; -+ -+ /** @data: Data to initialize the FW section with. */ -+ struct { -+ /** @start: Start offset in the FW binary. */ -+ u32 start; -+ -+ /** @end: End offset in the FW binary. */ -+ u32 end; -+ } data; -+}; -+ -+/** -+ * struct panthor_fw_binary_iter - Firmware binary iterator -+ * -+ * Used to parse a firmware binary. -+ */ -+struct panthor_fw_binary_iter { -+ /** @data: FW binary data. */ -+ const void *data; -+ -+ /** @size: FW binary size. */ -+ size_t size; -+ -+ /** @offset: Iterator offset. */ -+ size_t offset; -+}; -+ -+/** -+ * struct panthor_fw_section - FW section -+ */ -+struct panthor_fw_section { -+ /** @node: Used to keep track of FW sections. */ -+ struct list_head node; -+ -+ /** @flags: Section flags, as encoded in the FW binary. */ -+ u32 flags; -+ -+ /** @mem: Section memory. */ -+ struct panthor_kernel_bo *mem; -+ -+ /** -+ * @name: Name of the section, as specified in the binary. -+ * -+ * Can be NULL. -+ */ -+ const char *name; -+ -+ /** -+ * @data: Initial data copied to the FW memory. -+ * -+ * We keep data around so we can reload sections after a reset. -+ */ -+ struct { -+ /** @buf: Buffed used to store init data. */ -+ const void *buf; -+ -+ /** @size: Size of @buf in bytes. */ -+ size_t size; -+ } data; -+}; -+ -+#define CSF_MCU_SHARED_REGION_START 0x04000000ULL -+#define CSF_MCU_SHARED_REGION_SIZE 0x04000000ULL -+ -+#define MIN_CS_PER_CSG 8 -+#define MIN_CSGS 3 -+#define MAX_CSG_PRIO 0xf -+ -+#define CSF_IFACE_VERSION(major, minor, patch) \ -+ (((major) << 24) | ((minor) << 16) | (patch)) -+#define CSF_IFACE_VERSION_MAJOR(v) ((v) >> 24) -+#define CSF_IFACE_VERSION_MINOR(v) (((v) >> 16) & 0xff) -+#define CSF_IFACE_VERSION_PATCH(v) ((v) & 0xffff) -+ -+#define CSF_GROUP_CONTROL_OFFSET 0x1000 -+#define CSF_STREAM_CONTROL_OFFSET 0x40 -+#define CSF_UNPRESERVED_REG_COUNT 4 -+ -+/** -+ * struct panthor_fw_iface - FW interfaces -+ */ -+struct panthor_fw_iface { -+ /** @global: Global interface. */ -+ struct panthor_fw_global_iface global; -+ -+ /** @groups: Group slot interfaces. */ -+ struct panthor_fw_csg_iface groups[MAX_CSGS]; -+ -+ /** @streams: Command stream slot interfaces. */ -+ struct panthor_fw_cs_iface streams[MAX_CSGS][MAX_CS_PER_CSG]; -+}; -+ -+/** -+ * struct panthor_fw - Firmware management -+ */ -+struct panthor_fw { -+ /** @vm: MCU VM. */ -+ struct panthor_vm *vm; -+ -+ /** @sections: List of FW sections. */ -+ struct list_head sections; -+ -+ /** @shared_section: The section containing the FW interfaces. */ -+ struct panthor_fw_section *shared_section; -+ -+ /** @iface: FW interfaces. */ -+ struct panthor_fw_iface iface; -+ -+ /** @watchdog: Collection of fields relating to the FW watchdog. */ -+ struct { -+ /** @ping_work: Delayed work used to ping the FW. */ -+ struct delayed_work ping_work; -+ } watchdog; -+ -+ /** -+ * @req_waitqueue: FW request waitqueue. -+ * -+ * Everytime a request is sent to a command stream group or the global -+ * interface, the caller will first busy wait for the request to be -+ * acknowledged, and then fallback to a sleeping wait. -+ * -+ * This wait queue is here to support the sleeping wait flavor. -+ */ -+ wait_queue_head_t req_waitqueue; -+ -+ /** @booted: True is the FW is booted */ -+ bool booted; -+ -+ /** -+ * @fast_reset: True if the post_reset logic can proceed with a fast reset. -+ * -+ * A fast reset is just a reset where the driver doesn't reload the FW sections. -+ * -+ * Any time the firmware is properly suspended, a fast reset can take place. -+ * On the other hand, if the halt operation failed, the driver will reload -+ * all sections to make sure we start from a fresh state. -+ */ -+ bool fast_reset; -+ -+ /** @irq: Job irq data. */ -+ struct panthor_irq irq; -+}; -+ -+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev) -+{ -+ return ptdev->fw->vm; -+} -+ -+/** -+ * panthor_fw_get_glb_iface() - Get the global interface -+ * @ptdev: Device. -+ * -+ * Return: The global interface. -+ */ -+struct panthor_fw_global_iface * -+panthor_fw_get_glb_iface(struct panthor_device *ptdev) -+{ -+ return &ptdev->fw->iface.global; -+} -+ -+/** -+ * panthor_fw_get_csg_iface() - Get a command stream group slot interface -+ * @ptdev: Device. -+ * @csg_slot: Index of the command stream group slot. -+ * -+ * Return: The command stream group slot interface. -+ */ -+struct panthor_fw_csg_iface * -+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot) -+{ -+ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS)) -+ return NULL; -+ -+ return &ptdev->fw->iface.groups[csg_slot]; -+} -+ -+/** -+ * panthor_fw_get_cs_iface() - Get a command stream slot interface -+ * @ptdev: Device. -+ * @csg_slot: Index of the command stream group slot. -+ * @cs_slot: Index of the command stream slot. -+ * -+ * Return: The command stream slot interface. -+ */ -+struct panthor_fw_cs_iface * -+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot) -+{ -+ if (drm_WARN_ON(&ptdev->base, csg_slot >= MAX_CSGS || cs_slot > MAX_CS_PER_CSG)) -+ return NULL; -+ -+ return &ptdev->fw->iface.streams[csg_slot][cs_slot]; -+} -+ -+/** -+ * panthor_fw_conv_timeout() - Convert a timeout into a cycle-count -+ * @ptdev: Device. -+ * @timeout_us: Timeout expressed in micro-seconds. -+ * -+ * The FW has two timer sources: the GPU counter or arch-timer. We need -+ * to express timeouts in term of number of cycles and specify which -+ * timer source should be used. -+ * -+ * Return: A value suitable for timeout fields in the global interface. -+ */ -+static u32 panthor_fw_conv_timeout(struct panthor_device *ptdev, u32 timeout_us) -+{ -+ bool use_cycle_counter = false; -+ u32 timer_rate = 0; -+ u64 mod_cycles; -+ -+#ifdef CONFIG_ARM_ARCH_TIMER -+ timer_rate = arch_timer_get_cntfrq(); -+#endif -+ -+ if (!timer_rate) { -+ use_cycle_counter = true; -+ timer_rate = clk_get_rate(ptdev->clks.core); -+ } -+ -+ if (drm_WARN_ON(&ptdev->base, !timer_rate)) { -+ /* We couldn't get a valid clock rate, let's just pick the -+ * maximum value so the FW still handles the core -+ * power on/off requests. -+ */ -+ return GLB_TIMER_VAL(~0) | -+ GLB_TIMER_SOURCE_GPU_COUNTER; -+ } -+ -+ mod_cycles = DIV_ROUND_UP_ULL((u64)timeout_us * timer_rate, -+ 1000000ull << 10); -+ if (drm_WARN_ON(&ptdev->base, mod_cycles > GLB_TIMER_VAL(~0))) -+ mod_cycles = GLB_TIMER_VAL(~0); -+ -+ return GLB_TIMER_VAL(mod_cycles) | -+ (use_cycle_counter ? GLB_TIMER_SOURCE_GPU_COUNTER : 0); -+} -+ -+static int panthor_fw_binary_iter_read(struct panthor_device *ptdev, -+ struct panthor_fw_binary_iter *iter, -+ void *out, size_t size) -+{ -+ size_t new_offset = iter->offset + size; -+ -+ if (new_offset > iter->size || new_offset < iter->offset) { -+ drm_err(&ptdev->base, "Firmware too small\n"); -+ return -EINVAL; -+ } -+ -+ memcpy(out, iter->data + iter->offset, size); -+ iter->offset = new_offset; -+ return 0; -+} -+ -+static int panthor_fw_binary_sub_iter_init(struct panthor_device *ptdev, -+ struct panthor_fw_binary_iter *iter, -+ struct panthor_fw_binary_iter *sub_iter, -+ size_t size) -+{ -+ size_t new_offset = iter->offset + size; -+ -+ if (new_offset > iter->size || new_offset < iter->offset) { -+ drm_err(&ptdev->base, "Firmware entry too long\n"); -+ return -EINVAL; -+ } -+ -+ sub_iter->offset = 0; -+ sub_iter->data = iter->data + iter->offset; -+ sub_iter->size = size; -+ iter->offset = new_offset; -+ return 0; -+} -+ -+static void panthor_fw_init_section_mem(struct panthor_device *ptdev, -+ struct panthor_fw_section *section) -+{ -+ bool was_mapped = !!section->mem->kmap; -+ int ret; -+ -+ if (!section->data.size && -+ !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) -+ return; -+ -+ ret = panthor_kernel_bo_vmap(section->mem); -+ if (drm_WARN_ON(&ptdev->base, ret)) -+ return; -+ -+ memcpy(section->mem->kmap, section->data.buf, section->data.size); -+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { -+ memset(section->mem->kmap + section->data.size, 0, -+ panthor_kernel_bo_size(section->mem) - section->data.size); -+ } -+ -+ if (!was_mapped) -+ panthor_kernel_bo_vunmap(section->mem); -+} -+ -+/** -+ * panthor_fw_alloc_queue_iface_mem() - Allocate a ring-buffer interfaces. -+ * @ptdev: Device. -+ * @input: Pointer holding the input interface on success. -+ * Should be ignored on failure. -+ * @output: Pointer holding the output interface on success. -+ * Should be ignored on failure. -+ * @input_fw_va: Pointer holding the input interface FW VA on success. -+ * Should be ignored on failure. -+ * @output_fw_va: Pointer holding the output interface FW VA on success. -+ * Should be ignored on failure. -+ * -+ * Allocates panthor_fw_ringbuf_{input,out}_iface interfaces. The input -+ * interface is at offset 0, and the output interface at offset 4096. -+ * -+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. -+ */ -+struct panthor_kernel_bo * -+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, -+ struct panthor_fw_ringbuf_input_iface **input, -+ const struct panthor_fw_ringbuf_output_iface **output, -+ u32 *input_fw_va, u32 *output_fw_va) -+{ -+ struct panthor_kernel_bo *mem; -+ int ret; -+ -+ mem = panthor_kernel_bo_create(ptdev, ptdev->fw->vm, SZ_8K, -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+ if (IS_ERR(mem)) -+ return mem; -+ -+ ret = panthor_kernel_bo_vmap(mem); -+ if (ret) { -+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), mem); -+ return ERR_PTR(ret); -+ } -+ -+ memset(mem->kmap, 0, panthor_kernel_bo_size(mem)); -+ *input = mem->kmap; -+ *output = mem->kmap + SZ_4K; -+ *input_fw_va = panthor_kernel_bo_gpuva(mem); -+ *output_fw_va = *input_fw_va + SZ_4K; -+ -+ return mem; -+} -+ -+/** -+ * panthor_fw_alloc_suspend_buf_mem() - Allocate a suspend buffer for a command stream group. -+ * @ptdev: Device. -+ * @size: Size of the suspend buffer. -+ * -+ * Return: A valid pointer in case of success, an ERR_PTR() otherwise. -+ */ -+struct panthor_kernel_bo * -+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size) -+{ -+ return panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), size, -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+} -+ -+static int panthor_fw_load_section_entry(struct panthor_device *ptdev, -+ const struct firmware *fw, -+ struct panthor_fw_binary_iter *iter, -+ u32 ehdr) -+{ -+ struct panthor_fw_binary_section_entry_hdr hdr; -+ struct panthor_fw_section *section; -+ u32 section_size; -+ u32 name_len; -+ int ret; -+ -+ ret = panthor_fw_binary_iter_read(ptdev, iter, &hdr, sizeof(hdr)); -+ if (ret) -+ return ret; -+ -+ if (hdr.data.end < hdr.data.start) { -+ drm_err(&ptdev->base, "Firmware corrupted, data.end < data.start (0x%x < 0x%x)\n", -+ hdr.data.end, hdr.data.start); -+ return -EINVAL; -+ } -+ -+ if (hdr.va.end < hdr.va.start) { -+ drm_err(&ptdev->base, "Firmware corrupted, hdr.va.end < hdr.va.start (0x%x < 0x%x)\n", -+ hdr.va.end, hdr.va.start); -+ return -EINVAL; -+ } -+ -+ if (hdr.data.end > fw->size) { -+ drm_err(&ptdev->base, "Firmware corrupted, file truncated? data_end=0x%x > fw size=0x%zx\n", -+ hdr.data.end, fw->size); -+ return -EINVAL; -+ } -+ -+ if ((hdr.va.start & ~PAGE_MASK) != 0 || -+ (hdr.va.end & ~PAGE_MASK) != 0) { -+ drm_err(&ptdev->base, "Firmware corrupted, virtual addresses not page aligned: 0x%x-0x%x\n", -+ hdr.va.start, hdr.va.end); -+ return -EINVAL; -+ } -+ -+ if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { -+ drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", -+ hdr.flags); -+ return -EINVAL; -+ } -+ -+ if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { -+ drm_warn(&ptdev->base, -+ "Firmware protected mode entry not be supported, ignoring"); -+ return 0; -+ } -+ -+ if (hdr.va.start == CSF_MCU_SHARED_REGION_START && -+ !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { -+ drm_err(&ptdev->base, -+ "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); -+ return -EINVAL; -+ } -+ -+ name_len = iter->size - iter->offset; -+ -+ section = drmm_kzalloc(&ptdev->base, sizeof(*section), GFP_KERNEL); -+ if (!section) -+ return -ENOMEM; -+ -+ list_add_tail(§ion->node, &ptdev->fw->sections); -+ section->flags = hdr.flags; -+ section->data.size = hdr.data.end - hdr.data.start; -+ -+ if (section->data.size > 0) { -+ void *data = drmm_kmalloc(&ptdev->base, section->data.size, GFP_KERNEL); -+ -+ if (!data) -+ return -ENOMEM; -+ -+ memcpy(data, fw->data + hdr.data.start, section->data.size); -+ section->data.buf = data; -+ } -+ -+ if (name_len > 0) { -+ char *name = drmm_kmalloc(&ptdev->base, name_len + 1, GFP_KERNEL); -+ -+ if (!name) -+ return -ENOMEM; -+ -+ memcpy(name, iter->data + iter->offset, name_len); -+ name[name_len] = '\0'; -+ section->name = name; -+ } -+ -+ section_size = hdr.va.end - hdr.va.start; -+ if (section_size) { -+ u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; -+ struct panthor_gem_object *bo; -+ u32 vm_map_flags = 0; -+ struct sg_table *sgt; -+ u64 va = hdr.va.start; -+ -+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) -+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; -+ -+ if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) -+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; -+ -+ /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to -+ * non-cacheable for now. We might want to introduce a new -+ * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device -+ * memory and is currently not used by our driver) for -+ * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit -+ * of IO-coherent systems. -+ */ -+ if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) -+ vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; -+ -+ section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), -+ section_size, -+ DRM_PANTHOR_BO_NO_MMAP, -+ vm_map_flags, va); -+ if (IS_ERR(section->mem)) -+ return PTR_ERR(section->mem); -+ -+ if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) -+ return -EINVAL; -+ -+ if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { -+ ret = panthor_kernel_bo_vmap(section->mem); -+ if (ret) -+ return ret; -+ } -+ -+ panthor_fw_init_section_mem(ptdev, section); -+ -+ bo = to_panthor_bo(section->mem->obj); -+ sgt = drm_gem_shmem_get_pages_sgt(&bo->base); -+ if (IS_ERR(sgt)) -+ return PTR_ERR(sgt); -+ -+ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); -+ } -+ -+ if (hdr.va.start == CSF_MCU_SHARED_REGION_START) -+ ptdev->fw->shared_section = section; -+ -+ return 0; -+} -+ -+static void -+panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) -+{ -+ struct panthor_fw_section *section; -+ -+ list_for_each_entry(section, &ptdev->fw->sections, node) { -+ struct sg_table *sgt; -+ -+ if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) -+ continue; -+ -+ panthor_fw_init_section_mem(ptdev, section); -+ sgt = drm_gem_shmem_get_pages_sgt(&to_panthor_bo(section->mem->obj)->base); -+ if (!drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(sgt))) -+ dma_sync_sgtable_for_device(ptdev->base.dev, sgt, DMA_TO_DEVICE); -+ } -+} -+ -+static int panthor_fw_load_entry(struct panthor_device *ptdev, -+ const struct firmware *fw, -+ struct panthor_fw_binary_iter *iter) -+{ -+ struct panthor_fw_binary_iter eiter; -+ u32 ehdr; -+ int ret; -+ -+ ret = panthor_fw_binary_iter_read(ptdev, iter, &ehdr, sizeof(ehdr)); -+ if (ret) -+ return ret; -+ -+ if ((iter->offset % sizeof(u32)) || -+ (CSF_FW_BINARY_ENTRY_SIZE(ehdr) % sizeof(u32))) { -+ drm_err(&ptdev->base, "Firmware entry isn't 32 bit aligned, offset=0x%x size=0x%x\n", -+ (u32)(iter->offset - sizeof(u32)), CSF_FW_BINARY_ENTRY_SIZE(ehdr)); -+ return -EINVAL; -+ } -+ -+ if (panthor_fw_binary_sub_iter_init(ptdev, iter, &eiter, -+ CSF_FW_BINARY_ENTRY_SIZE(ehdr) - sizeof(ehdr))) -+ return -EINVAL; -+ -+ switch (CSF_FW_BINARY_ENTRY_TYPE(ehdr)) { -+ case CSF_FW_BINARY_ENTRY_TYPE_IFACE: -+ return panthor_fw_load_section_entry(ptdev, fw, &eiter, ehdr); -+ -+ /* FIXME: handle those entry types? */ -+ case CSF_FW_BINARY_ENTRY_TYPE_CONFIG: -+ case CSF_FW_BINARY_ENTRY_TYPE_FUTF_TEST: -+ case CSF_FW_BINARY_ENTRY_TYPE_TRACE_BUFFER: -+ case CSF_FW_BINARY_ENTRY_TYPE_TIMELINE_METADATA: -+ return 0; -+ default: -+ break; -+ } -+ -+ if (ehdr & CSF_FW_BINARY_ENTRY_OPTIONAL) -+ return 0; -+ -+ drm_err(&ptdev->base, -+ "Unsupported non-optional entry type %u in firmware\n", -+ CSF_FW_BINARY_ENTRY_TYPE(ehdr)); -+ return -EINVAL; -+} -+ -+static int panthor_fw_load(struct panthor_device *ptdev) -+{ -+ const struct firmware *fw = NULL; -+ struct panthor_fw_binary_iter iter = {}; -+ struct panthor_fw_binary_hdr hdr; -+ char fw_path[128]; -+ int ret; -+ -+ snprintf(fw_path, sizeof(fw_path), "arm/mali/arch%d.%d/%s", -+ (u32)GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id), -+ (u32)GPU_ARCH_MINOR(ptdev->gpu_info.gpu_id), -+ CSF_FW_NAME); -+ -+ ret = request_firmware(&fw, fw_path, ptdev->base.dev); -+ if (ret) { -+ drm_err(&ptdev->base, "Failed to load firmware image '%s'\n", -+ CSF_FW_NAME); -+ return ret; -+ } -+ -+ iter.data = fw->data; -+ iter.size = fw->size; -+ ret = panthor_fw_binary_iter_read(ptdev, &iter, &hdr, sizeof(hdr)); -+ if (ret) -+ goto out; -+ -+ if (hdr.magic != CSF_FW_BINARY_HEADER_MAGIC) { -+ ret = -EINVAL; -+ drm_err(&ptdev->base, "Invalid firmware magic\n"); -+ goto out; -+ } -+ -+ if (hdr.major != CSF_FW_BINARY_HEADER_MAJOR_MAX) { -+ ret = -EINVAL; -+ drm_err(&ptdev->base, "Unsupported firmware binary header version %d.%d (expected %d.x)\n", -+ hdr.major, hdr.minor, CSF_FW_BINARY_HEADER_MAJOR_MAX); -+ goto out; -+ } -+ -+ if (hdr.size > iter.size) { -+ drm_err(&ptdev->base, "Firmware image is truncated\n"); -+ goto out; -+ } -+ -+ iter.size = hdr.size; -+ -+ while (iter.offset < hdr.size) { -+ ret = panthor_fw_load_entry(ptdev, fw, &iter); -+ if (ret) -+ goto out; -+ } -+ -+ if (!ptdev->fw->shared_section) { -+ drm_err(&ptdev->base, "Shared interface region not found\n"); -+ ret = -EINVAL; -+ goto out; -+ } -+ -+out: -+ release_firmware(fw); -+ return ret; -+} -+ -+/** -+ * iface_fw_to_cpu_addr() - Turn an MCU address into a CPU address -+ * @ptdev: Device. -+ * @mcu_va: MCU address. -+ * -+ * Return: NULL if the address is not part of the shared section, non-NULL otherwise. -+ */ -+static void *iface_fw_to_cpu_addr(struct panthor_device *ptdev, u32 mcu_va) -+{ -+ u64 shared_mem_start = panthor_kernel_bo_gpuva(ptdev->fw->shared_section->mem); -+ u64 shared_mem_end = shared_mem_start + -+ panthor_kernel_bo_size(ptdev->fw->shared_section->mem); -+ if (mcu_va < shared_mem_start || mcu_va >= shared_mem_end) -+ return NULL; -+ -+ return ptdev->fw->shared_section->mem->kmap + (mcu_va - shared_mem_start); -+} -+ -+static int panthor_init_cs_iface(struct panthor_device *ptdev, -+ unsigned int csg_idx, unsigned int cs_idx) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_idx); -+ struct panthor_fw_cs_iface *cs_iface = &ptdev->fw->iface.streams[csg_idx][cs_idx]; -+ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); -+ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + -+ (csg_idx * glb_iface->control->group_stride) + -+ CSF_STREAM_CONTROL_OFFSET + -+ (cs_idx * csg_iface->control->stream_stride); -+ struct panthor_fw_cs_iface *first_cs_iface = -+ panthor_fw_get_cs_iface(ptdev, 0, 0); -+ -+ if (iface_offset + sizeof(*cs_iface) >= shared_section_sz) -+ return -EINVAL; -+ -+ spin_lock_init(&cs_iface->lock); -+ cs_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; -+ cs_iface->input = iface_fw_to_cpu_addr(ptdev, cs_iface->control->input_va); -+ cs_iface->output = iface_fw_to_cpu_addr(ptdev, cs_iface->control->output_va); -+ -+ if (!cs_iface->input || !cs_iface->output) { -+ drm_err(&ptdev->base, "Invalid stream control interface input/output VA"); -+ return -EINVAL; -+ } -+ -+ if (cs_iface != first_cs_iface) { -+ if (cs_iface->control->features != first_cs_iface->control->features) { -+ drm_err(&ptdev->base, "Expecting identical CS slots"); -+ return -EINVAL; -+ } -+ } else { -+ u32 reg_count = CS_FEATURES_WORK_REGS(cs_iface->control->features); -+ -+ ptdev->csif_info.cs_reg_count = reg_count; -+ ptdev->csif_info.unpreserved_cs_reg_count = CSF_UNPRESERVED_REG_COUNT; -+ } -+ -+ return 0; -+} -+ -+static bool compare_csg(const struct panthor_fw_csg_control_iface *a, -+ const struct panthor_fw_csg_control_iface *b) -+{ -+ if (a->features != b->features) -+ return false; -+ if (a->suspend_size != b->suspend_size) -+ return false; -+ if (a->protm_suspend_size != b->protm_suspend_size) -+ return false; -+ if (a->stream_num != b->stream_num) -+ return false; -+ return true; -+} -+ -+static int panthor_init_csg_iface(struct panthor_device *ptdev, -+ unsigned int csg_idx) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ struct panthor_fw_csg_iface *csg_iface = &ptdev->fw->iface.groups[csg_idx]; -+ u64 shared_section_sz = panthor_kernel_bo_size(ptdev->fw->shared_section->mem); -+ u32 iface_offset = CSF_GROUP_CONTROL_OFFSET + (csg_idx * glb_iface->control->group_stride); -+ unsigned int i; -+ -+ if (iface_offset + sizeof(*csg_iface) >= shared_section_sz) -+ return -EINVAL; -+ -+ spin_lock_init(&csg_iface->lock); -+ csg_iface->control = ptdev->fw->shared_section->mem->kmap + iface_offset; -+ csg_iface->input = iface_fw_to_cpu_addr(ptdev, csg_iface->control->input_va); -+ csg_iface->output = iface_fw_to_cpu_addr(ptdev, csg_iface->control->output_va); -+ -+ if (csg_iface->control->stream_num < MIN_CS_PER_CSG || -+ csg_iface->control->stream_num > MAX_CS_PER_CSG) -+ return -EINVAL; -+ -+ if (!csg_iface->input || !csg_iface->output) { -+ drm_err(&ptdev->base, "Invalid group control interface input/output VA"); -+ return -EINVAL; -+ } -+ -+ if (csg_idx > 0) { -+ struct panthor_fw_csg_iface *first_csg_iface = -+ panthor_fw_get_csg_iface(ptdev, 0); -+ -+ if (!compare_csg(first_csg_iface->control, csg_iface->control)) { -+ drm_err(&ptdev->base, "Expecting identical CSG slots"); -+ return -EINVAL; -+ } -+ } -+ -+ for (i = 0; i < csg_iface->control->stream_num; i++) { -+ int ret = panthor_init_cs_iface(ptdev, csg_idx, i); -+ -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static u32 panthor_get_instr_features(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ -+ if (glb_iface->control->version < CSF_IFACE_VERSION(1, 1, 0)) -+ return 0; -+ -+ return glb_iface->control->instr_features; -+} -+ -+static int panthor_fw_init_ifaces(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = &ptdev->fw->iface.global; -+ unsigned int i; -+ -+ if (!ptdev->fw->shared_section->mem->kmap) -+ return -EINVAL; -+ -+ spin_lock_init(&glb_iface->lock); -+ glb_iface->control = ptdev->fw->shared_section->mem->kmap; -+ -+ if (!glb_iface->control->version) { -+ drm_err(&ptdev->base, "Firmware version is 0. Firmware may have failed to boot"); -+ return -EINVAL; -+ } -+ -+ glb_iface->input = iface_fw_to_cpu_addr(ptdev, glb_iface->control->input_va); -+ glb_iface->output = iface_fw_to_cpu_addr(ptdev, glb_iface->control->output_va); -+ if (!glb_iface->input || !glb_iface->output) { -+ drm_err(&ptdev->base, "Invalid global control interface input/output VA"); -+ return -EINVAL; -+ } -+ -+ if (glb_iface->control->group_num > MAX_CSGS || -+ glb_iface->control->group_num < MIN_CSGS) { -+ drm_err(&ptdev->base, "Invalid number of control groups"); -+ return -EINVAL; -+ } -+ -+ for (i = 0; i < glb_iface->control->group_num; i++) { -+ int ret = panthor_init_csg_iface(ptdev, i); -+ -+ if (ret) -+ return ret; -+ } -+ -+ drm_info(&ptdev->base, "CSF FW v%d.%d.%d, Features %#x Instrumentation features %#x", -+ CSF_IFACE_VERSION_MAJOR(glb_iface->control->version), -+ CSF_IFACE_VERSION_MINOR(glb_iface->control->version), -+ CSF_IFACE_VERSION_PATCH(glb_iface->control->version), -+ glb_iface->control->features, -+ panthor_get_instr_features(ptdev)); -+ return 0; -+} -+ -+static void panthor_fw_init_global_iface(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ -+ /* Enable all cores. */ -+ glb_iface->input->core_en_mask = ptdev->gpu_info.shader_present; -+ -+ /* Setup timers. */ -+ glb_iface->input->poweroff_timer = panthor_fw_conv_timeout(ptdev, PWROFF_HYSTERESIS_US); -+ glb_iface->input->progress_timer = PROGRESS_TIMEOUT_CYCLES >> PROGRESS_TIMEOUT_SCALE_SHIFT; -+ glb_iface->input->idle_timer = panthor_fw_conv_timeout(ptdev, IDLE_HYSTERESIS_US); -+ -+ /* Enable interrupts we care about. */ -+ glb_iface->input->ack_irq_mask = GLB_CFG_ALLOC_EN | -+ GLB_PING | -+ GLB_CFG_PROGRESS_TIMER | -+ GLB_CFG_POWEROFF_TIMER | -+ GLB_IDLE_EN | -+ GLB_IDLE; -+ -+ panthor_fw_update_reqs(glb_iface, req, GLB_IDLE_EN, GLB_IDLE_EN); -+ panthor_fw_toggle_reqs(glb_iface, req, ack, -+ GLB_CFG_ALLOC_EN | -+ GLB_CFG_POWEROFF_TIMER | -+ GLB_CFG_PROGRESS_TIMER); -+ -+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); -+ -+ /* Kick the watchdog. */ -+ mod_delayed_work(ptdev->reset.wq, &ptdev->fw->watchdog.ping_work, -+ msecs_to_jiffies(PING_INTERVAL_MS)); -+} -+ -+static void panthor_job_irq_handler(struct panthor_device *ptdev, u32 status) -+{ -+ if (!ptdev->fw->booted && (status & JOB_INT_GLOBAL_IF)) -+ ptdev->fw->booted = true; -+ -+ wake_up_all(&ptdev->fw->req_waitqueue); -+ -+ /* If the FW is not booted, don't process IRQs, just flag the FW as booted. */ -+ if (!ptdev->fw->booted) -+ return; -+ -+ panthor_sched_report_fw_events(ptdev, status); -+} -+PANTHOR_IRQ_HANDLER(job, JOB, panthor_job_irq_handler); -+ -+static int panthor_fw_start(struct panthor_device *ptdev) -+{ -+ bool timedout = false; -+ -+ ptdev->fw->booted = false; -+ panthor_job_irq_resume(&ptdev->fw->irq, ~0); -+ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_AUTO); -+ -+ if (!wait_event_timeout(ptdev->fw->req_waitqueue, -+ ptdev->fw->booted, -+ msecs_to_jiffies(1000))) { -+ if (!ptdev->fw->booted && -+ !(gpu_read(ptdev, JOB_INT_STAT) & JOB_INT_GLOBAL_IF)) -+ timedout = true; -+ } -+ -+ if (timedout) { -+ drm_err(&ptdev->base, "Failed to boot MCU"); -+ return -ETIMEDOUT; -+ } -+ -+ return 0; -+} -+ -+static void panthor_fw_stop(struct panthor_device *ptdev) -+{ -+ u32 status; -+ -+ gpu_write(ptdev, MCU_CONTROL, MCU_CONTROL_DISABLE); -+ if (readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, -+ status == MCU_STATUS_DISABLED, 10, 100000)) -+ drm_err(&ptdev->base, "Failed to stop MCU"); -+} -+ -+/** -+ * panthor_fw_pre_reset() - Call before a reset. -+ * @ptdev: Device. -+ * @on_hang: true if the reset was triggered on a GPU hang. -+ * -+ * If the reset is not triggered on a hang, we try to gracefully halt the -+ * MCU, so we can do a fast-reset when panthor_fw_post_reset() is called. -+ */ -+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) -+{ -+ /* Make sure we won't be woken up by a ping. */ -+ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); -+ -+ ptdev->fw->fast_reset = false; -+ -+ if (!on_hang) { -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ u32 status; -+ -+ panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); -+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); -+ if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, -+ status == MCU_STATUS_HALT, 10, 100000) && -+ glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { -+ ptdev->fw->fast_reset = true; -+ } else { -+ drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); -+ } -+ -+ /* The FW detects 0 -> 1 transitions. Make sure we reset -+ * the HALT bit before the FW is rebooted. -+ */ -+ panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); -+ } -+ -+ panthor_job_irq_suspend(&ptdev->fw->irq); -+} -+ -+/** -+ * panthor_fw_post_reset() - Call after a reset. -+ * @ptdev: Device. -+ * -+ * Start the FW. If this is not a fast reset, all FW sections are reloaded to -+ * make sure we can recover from a memory corruption. -+ */ -+int panthor_fw_post_reset(struct panthor_device *ptdev) -+{ -+ int ret; -+ -+ /* Make the MCU VM active. */ -+ ret = panthor_vm_active(ptdev->fw->vm); -+ if (ret) -+ return ret; -+ -+ /* Reload all sections, including RO ones. We're not supposed -+ * to end up here anyway, let's just assume the overhead of -+ * reloading everything is acceptable. -+ */ -+ if (!ptdev->fw->fast_reset) -+ panthor_reload_fw_sections(ptdev, true); -+ -+ ret = panthor_fw_start(ptdev); -+ if (ret) -+ return ret; -+ -+ /* We must re-initialize the global interface even on fast-reset. */ -+ panthor_fw_init_global_iface(ptdev); -+ return 0; -+} -+ -+/** -+ * panthor_fw_unplug() - Called when the device is unplugged. -+ * @ptdev: Device. -+ * -+ * This function must make sure all pending operations are flushed before -+ * will release device resources, thus preventing any interaction with -+ * the HW. -+ * -+ * If there is still FW-related work running after this function returns, -+ * they must use drm_dev_{enter,exit}() and skip any HW access when -+ * drm_dev_enter() returns false. -+ */ -+void panthor_fw_unplug(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_section *section; -+ -+ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); -+ -+ /* Make sure the IRQ handler can be called after that point. */ -+ if (ptdev->fw->irq.irq) -+ panthor_job_irq_suspend(&ptdev->fw->irq); -+ -+ panthor_fw_stop(ptdev); -+ -+ list_for_each_entry(section, &ptdev->fw->sections, node) -+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), section->mem); -+ -+ /* We intentionally don't call panthor_vm_idle() and let -+ * panthor_mmu_unplug() release the AS we acquired with -+ * panthor_vm_active() so we don't have to track the VM active/idle -+ * state to keep the active_refcnt balanced. -+ */ -+ panthor_vm_put(ptdev->fw->vm); -+ -+ panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); -+} -+ -+/** -+ * panthor_fw_wait_acks() - Wait for requests to be acknowledged by the FW. -+ * @req_ptr: Pointer to the req register. -+ * @ack_ptr: Pointer to the ack register. -+ * @wq: Wait queue to use for the sleeping wait. -+ * @req_mask: Mask of requests to wait for. -+ * @acked: Pointer to field that's updated with the acked requests. -+ * If the function returns 0, *acked == req_mask. -+ * @timeout_ms: Timeout expressed in milliseconds. -+ * -+ * Return: 0 on success, -ETIMEDOUT otherwise. -+ */ -+static int panthor_fw_wait_acks(const u32 *req_ptr, const u32 *ack_ptr, -+ wait_queue_head_t *wq, -+ u32 req_mask, u32 *acked, -+ u32 timeout_ms) -+{ -+ u32 ack, req = READ_ONCE(*req_ptr) & req_mask; -+ int ret; -+ -+ /* Busy wait for a few µsecs before falling back to a sleeping wait. */ -+ *acked = req_mask; -+ ret = read_poll_timeout_atomic(READ_ONCE, ack, -+ (ack & req_mask) == req, -+ 0, 10, 0, -+ *ack_ptr); -+ if (!ret) -+ return 0; -+ -+ if (wait_event_timeout(*wq, (READ_ONCE(*ack_ptr) & req_mask) == req, -+ msecs_to_jiffies(timeout_ms))) -+ return 0; -+ -+ /* Check one last time, in case we were not woken up for some reason. */ -+ ack = READ_ONCE(*ack_ptr); -+ if ((ack & req_mask) == req) -+ return 0; -+ -+ *acked = ~(req ^ ack) & req_mask; -+ return -ETIMEDOUT; -+} -+ -+/** -+ * panthor_fw_glb_wait_acks() - Wait for global requests to be acknowledged. -+ * @ptdev: Device. -+ * @req_mask: Mask of requests to wait for. -+ * @acked: Pointer to field that's updated with the acked requests. -+ * If the function returns 0, *acked == req_mask. -+ * @timeout_ms: Timeout expressed in milliseconds. -+ * -+ * Return: 0 on success, -ETIMEDOUT otherwise. -+ */ -+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, -+ u32 req_mask, u32 *acked, -+ u32 timeout_ms) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ -+ /* GLB_HALT doesn't get acked through the FW interface. */ -+ if (drm_WARN_ON(&ptdev->base, req_mask & (~GLB_REQ_MASK | GLB_HALT))) -+ return -EINVAL; -+ -+ return panthor_fw_wait_acks(&glb_iface->input->req, -+ &glb_iface->output->ack, -+ &ptdev->fw->req_waitqueue, -+ req_mask, acked, timeout_ms); -+} -+ -+/** -+ * panthor_fw_csg_wait_acks() - Wait for command stream group requests to be acknowledged. -+ * @ptdev: Device. -+ * @csg_slot: CSG slot ID. -+ * @req_mask: Mask of requests to wait for. -+ * @acked: Pointer to field that's updated with the acked requests. -+ * If the function returns 0, *acked == req_mask. -+ * @timeout_ms: Timeout expressed in milliseconds. -+ * -+ * Return: 0 on success, -ETIMEDOUT otherwise. -+ */ -+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_slot, -+ u32 req_mask, u32 *acked, u32 timeout_ms) -+{ -+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot); -+ int ret; -+ -+ if (drm_WARN_ON(&ptdev->base, req_mask & ~CSG_REQ_MASK)) -+ return -EINVAL; -+ -+ ret = panthor_fw_wait_acks(&csg_iface->input->req, -+ &csg_iface->output->ack, -+ &ptdev->fw->req_waitqueue, -+ req_mask, acked, timeout_ms); -+ -+ /* -+ * Check that all bits in the state field were updated, if any mismatch -+ * then clear all bits in the state field. This allows code to do -+ * (acked & CSG_STATE_MASK) and get the right value. -+ */ -+ -+ if ((*acked & CSG_STATE_MASK) != CSG_STATE_MASK) -+ *acked &= ~CSG_STATE_MASK; -+ -+ return ret; -+} -+ -+/** -+ * panthor_fw_ring_csg_doorbells() - Ring command stream group doorbells. -+ * @ptdev: Device. -+ * @csg_mask: Bitmask encoding the command stream group doorbells to ring. -+ * -+ * This function is toggling bits in the doorbell_req and ringing the -+ * global doorbell. It doesn't require a user doorbell to be attached to -+ * the group. -+ */ -+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_mask) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ -+ panthor_fw_toggle_reqs(glb_iface, doorbell_req, doorbell_ack, csg_mask); -+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); -+} -+ -+static void panthor_fw_ping_work(struct work_struct *work) -+{ -+ struct panthor_fw *fw = container_of(work, struct panthor_fw, watchdog.ping_work.work); -+ struct panthor_device *ptdev = fw->irq.ptdev; -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ u32 acked; -+ int ret; -+ -+ if (panthor_device_reset_is_pending(ptdev)) -+ return; -+ -+ panthor_fw_toggle_reqs(glb_iface, req, ack, GLB_PING); -+ gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); -+ -+ ret = panthor_fw_glb_wait_acks(ptdev, GLB_PING, &acked, 100); -+ if (ret) { -+ panthor_device_schedule_reset(ptdev); -+ drm_err(&ptdev->base, "FW ping timeout, scheduling a reset"); -+ } else { -+ mod_delayed_work(ptdev->reset.wq, &fw->watchdog.ping_work, -+ msecs_to_jiffies(PING_INTERVAL_MS)); -+ } -+} -+ -+/** -+ * panthor_fw_init() - Initialize FW related data. -+ * @ptdev: Device. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+int panthor_fw_init(struct panthor_device *ptdev) -+{ -+ struct panthor_fw *fw; -+ int ret, irq; -+ -+ fw = drmm_kzalloc(&ptdev->base, sizeof(*fw), GFP_KERNEL); -+ if (!fw) -+ return -ENOMEM; -+ -+ ptdev->fw = fw; -+ init_waitqueue_head(&fw->req_waitqueue); -+ INIT_LIST_HEAD(&fw->sections); -+ INIT_DELAYED_WORK(&fw->watchdog.ping_work, panthor_fw_ping_work); -+ -+ irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "job"); -+ if (irq <= 0) -+ return -ENODEV; -+ -+ ret = panthor_request_job_irq(ptdev, &fw->irq, irq, 0); -+ if (ret) { -+ drm_err(&ptdev->base, "failed to request job irq"); -+ return ret; -+ } -+ -+ ret = panthor_gpu_l2_power_on(ptdev); -+ if (ret) -+ return ret; -+ -+ fw->vm = panthor_vm_create(ptdev, true, -+ 0, SZ_4G, -+ CSF_MCU_SHARED_REGION_START, -+ CSF_MCU_SHARED_REGION_SIZE); -+ if (IS_ERR(fw->vm)) { -+ ret = PTR_ERR(fw->vm); -+ fw->vm = NULL; -+ goto err_unplug_fw; -+ } -+ -+ ret = panthor_fw_load(ptdev); -+ if (ret) -+ goto err_unplug_fw; -+ -+ ret = panthor_vm_active(fw->vm); -+ if (ret) -+ goto err_unplug_fw; -+ -+ ret = panthor_fw_start(ptdev); -+ if (ret) -+ goto err_unplug_fw; -+ -+ ret = panthor_fw_init_ifaces(ptdev); -+ if (ret) -+ goto err_unplug_fw; -+ -+ panthor_fw_init_global_iface(ptdev); -+ return 0; -+ -+err_unplug_fw: -+ panthor_fw_unplug(ptdev); -+ return ret; -+} -+ -+MODULE_FIRMWARE("arm/mali/arch10.8/mali_csffw.bin"); -diff --git a/drivers/gpu/drm/panthor/panthor_fw.h b/drivers/gpu/drm/panthor/panthor_fw.h -new file mode 100644 -index 000000000000..3bdba2eee29e ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_fw.h -@@ -0,0 +1,504 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_MCU_H__ -+#define __PANTHOR_MCU_H__ -+ -+#include -+ -+#include "panthor_device.h" -+ -+struct panthor_kernel_bo; -+ -+#define MAX_CSGS 31 -+#define MAX_CS_PER_CSG 32 -+ -+struct panthor_fw_ringbuf_input_iface { -+ u64 insert; -+ u64 extract; -+}; -+ -+struct panthor_fw_ringbuf_output_iface { -+ u64 extract; -+ u32 active; -+}; -+ -+struct panthor_fw_cs_control_iface { -+#define CS_FEATURES_WORK_REGS(x) (((x) & GENMASK(7, 0)) + 1) -+#define CS_FEATURES_SCOREBOARDS(x) (((x) & GENMASK(15, 8)) >> 8) -+#define CS_FEATURES_COMPUTE BIT(16) -+#define CS_FEATURES_FRAGMENT BIT(17) -+#define CS_FEATURES_TILER BIT(18) -+ u32 features; -+ u32 input_va; -+ u32 output_va; -+}; -+ -+struct panthor_fw_cs_input_iface { -+#define CS_STATE_MASK GENMASK(2, 0) -+#define CS_STATE_STOP 0 -+#define CS_STATE_START 1 -+#define CS_EXTRACT_EVENT BIT(4) -+#define CS_IDLE_SYNC_WAIT BIT(8) -+#define CS_IDLE_PROTM_PENDING BIT(9) -+#define CS_IDLE_EMPTY BIT(10) -+#define CS_IDLE_RESOURCE_REQ BIT(11) -+#define CS_TILER_OOM BIT(26) -+#define CS_PROTM_PENDING BIT(27) -+#define CS_FATAL BIT(30) -+#define CS_FAULT BIT(31) -+#define CS_REQ_MASK (CS_STATE_MASK | \ -+ CS_EXTRACT_EVENT | \ -+ CS_IDLE_SYNC_WAIT | \ -+ CS_IDLE_PROTM_PENDING | \ -+ CS_IDLE_EMPTY | \ -+ CS_IDLE_RESOURCE_REQ) -+#define CS_EVT_MASK (CS_TILER_OOM | \ -+ CS_PROTM_PENDING | \ -+ CS_FATAL | \ -+ CS_FAULT) -+ u32 req; -+ -+#define CS_CONFIG_PRIORITY(x) ((x) & GENMASK(3, 0)) -+#define CS_CONFIG_DOORBELL(x) (((x) << 8) & GENMASK(15, 8)) -+ u32 config; -+ u32 reserved1; -+ u32 ack_irq_mask; -+ u64 ringbuf_base; -+ u32 ringbuf_size; -+ u32 reserved2; -+ u64 heap_start; -+ u64 heap_end; -+ u64 ringbuf_input; -+ u64 ringbuf_output; -+ u32 instr_config; -+ u32 instrbuf_size; -+ u64 instrbuf_base; -+ u64 instrbuf_offset_ptr; -+}; -+ -+struct panthor_fw_cs_output_iface { -+ u32 ack; -+ u32 reserved1[15]; -+ u64 status_cmd_ptr; -+ -+#define CS_STATUS_WAIT_SB_MASK GENMASK(15, 0) -+#define CS_STATUS_WAIT_SB_SRC_MASK GENMASK(19, 16) -+#define CS_STATUS_WAIT_SB_SRC_NONE (0 << 16) -+#define CS_STATUS_WAIT_SB_SRC_WAIT (8 << 16) -+#define CS_STATUS_WAIT_SYNC_COND_LE (0 << 24) -+#define CS_STATUS_WAIT_SYNC_COND_GT (1 << 24) -+#define CS_STATUS_WAIT_SYNC_COND_MASK GENMASK(27, 24) -+#define CS_STATUS_WAIT_PROGRESS BIT(28) -+#define CS_STATUS_WAIT_PROTM BIT(29) -+#define CS_STATUS_WAIT_SYNC_64B BIT(30) -+#define CS_STATUS_WAIT_SYNC BIT(31) -+ u32 status_wait; -+ u32 status_req_resource; -+ u64 status_wait_sync_ptr; -+ u32 status_wait_sync_value; -+ u32 status_scoreboards; -+ -+#define CS_STATUS_BLOCKED_REASON_UNBLOCKED 0 -+#define CS_STATUS_BLOCKED_REASON_SB_WAIT 1 -+#define CS_STATUS_BLOCKED_REASON_PROGRESS_WAIT 2 -+#define CS_STATUS_BLOCKED_REASON_SYNC_WAIT 3 -+#define CS_STATUS_BLOCKED_REASON_DEFERRED 5 -+#define CS_STATUS_BLOCKED_REASON_RES 6 -+#define CS_STATUS_BLOCKED_REASON_FLUSH 7 -+#define CS_STATUS_BLOCKED_REASON_MASK GENMASK(3, 0) -+ u32 status_blocked_reason; -+ u32 status_wait_sync_value_hi; -+ u32 reserved2[6]; -+ -+#define CS_EXCEPTION_TYPE(x) ((x) & GENMASK(7, 0)) -+#define CS_EXCEPTION_DATA(x) (((x) >> 8) & GENMASK(23, 0)) -+ u32 fault; -+ u32 fatal; -+ u64 fault_info; -+ u64 fatal_info; -+ u32 reserved3[10]; -+ u32 heap_vt_start; -+ u32 heap_vt_end; -+ u32 reserved4; -+ u32 heap_frag_end; -+ u64 heap_address; -+}; -+ -+struct panthor_fw_csg_control_iface { -+ u32 features; -+ u32 input_va; -+ u32 output_va; -+ u32 suspend_size; -+ u32 protm_suspend_size; -+ u32 stream_num; -+ u32 stream_stride; -+}; -+ -+struct panthor_fw_csg_input_iface { -+#define CSG_STATE_MASK GENMASK(2, 0) -+#define CSG_STATE_TERMINATE 0 -+#define CSG_STATE_START 1 -+#define CSG_STATE_SUSPEND 2 -+#define CSG_STATE_RESUME 3 -+#define CSG_ENDPOINT_CONFIG BIT(4) -+#define CSG_STATUS_UPDATE BIT(5) -+#define CSG_SYNC_UPDATE BIT(28) -+#define CSG_IDLE BIT(29) -+#define CSG_DOORBELL BIT(30) -+#define CSG_PROGRESS_TIMER_EVENT BIT(31) -+#define CSG_REQ_MASK (CSG_STATE_MASK | \ -+ CSG_ENDPOINT_CONFIG | \ -+ CSG_STATUS_UPDATE) -+#define CSG_EVT_MASK (CSG_SYNC_UPDATE | \ -+ CSG_IDLE | \ -+ CSG_PROGRESS_TIMER_EVENT) -+ u32 req; -+ u32 ack_irq_mask; -+ -+ u32 doorbell_req; -+ u32 cs_irq_ack; -+ u32 reserved1[4]; -+ u64 allow_compute; -+ u64 allow_fragment; -+ u32 allow_other; -+ -+#define CSG_EP_REQ_COMPUTE(x) ((x) & GENMASK(7, 0)) -+#define CSG_EP_REQ_FRAGMENT(x) (((x) << 8) & GENMASK(15, 8)) -+#define CSG_EP_REQ_TILER(x) (((x) << 16) & GENMASK(19, 16)) -+#define CSG_EP_REQ_EXCL_COMPUTE BIT(20) -+#define CSG_EP_REQ_EXCL_FRAGMENT BIT(21) -+#define CSG_EP_REQ_PRIORITY(x) (((x) << 28) & GENMASK(31, 28)) -+#define CSG_EP_REQ_PRIORITY_MASK GENMASK(31, 28) -+ u32 endpoint_req; -+ u32 reserved2[2]; -+ u64 suspend_buf; -+ u64 protm_suspend_buf; -+ u32 config; -+ u32 iter_trace_config; -+}; -+ -+struct panthor_fw_csg_output_iface { -+ u32 ack; -+ u32 reserved1; -+ u32 doorbell_ack; -+ u32 cs_irq_req; -+ u32 status_endpoint_current; -+ u32 status_endpoint_req; -+ -+#define CSG_STATUS_STATE_IS_IDLE BIT(0) -+ u32 status_state; -+ u32 resource_dep; -+}; -+ -+struct panthor_fw_global_control_iface { -+ u32 version; -+ u32 features; -+ u32 input_va; -+ u32 output_va; -+ u32 group_num; -+ u32 group_stride; -+ u32 perfcnt_size; -+ u32 instr_features; -+}; -+ -+struct panthor_fw_global_input_iface { -+#define GLB_HALT BIT(0) -+#define GLB_CFG_PROGRESS_TIMER BIT(1) -+#define GLB_CFG_ALLOC_EN BIT(2) -+#define GLB_CFG_POWEROFF_TIMER BIT(3) -+#define GLB_PROTM_ENTER BIT(4) -+#define GLB_PERFCNT_EN BIT(5) -+#define GLB_PERFCNT_SAMPLE BIT(6) -+#define GLB_COUNTER_EN BIT(7) -+#define GLB_PING BIT(8) -+#define GLB_FWCFG_UPDATE BIT(9) -+#define GLB_IDLE_EN BIT(10) -+#define GLB_SLEEP BIT(12) -+#define GLB_INACTIVE_COMPUTE BIT(20) -+#define GLB_INACTIVE_FRAGMENT BIT(21) -+#define GLB_INACTIVE_TILER BIT(22) -+#define GLB_PROTM_EXIT BIT(23) -+#define GLB_PERFCNT_THRESHOLD BIT(24) -+#define GLB_PERFCNT_OVERFLOW BIT(25) -+#define GLB_IDLE BIT(26) -+#define GLB_DBG_CSF BIT(30) -+#define GLB_DBG_HOST BIT(31) -+#define GLB_REQ_MASK GENMASK(10, 0) -+#define GLB_EVT_MASK GENMASK(26, 20) -+ u32 req; -+ u32 ack_irq_mask; -+ u32 doorbell_req; -+ u32 reserved1; -+ u32 progress_timer; -+ -+#define GLB_TIMER_VAL(x) ((x) & GENMASK(30, 0)) -+#define GLB_TIMER_SOURCE_GPU_COUNTER BIT(31) -+ u32 poweroff_timer; -+ u64 core_en_mask; -+ u32 reserved2; -+ u32 perfcnt_as; -+ u64 perfcnt_base; -+ u32 perfcnt_extract; -+ u32 reserved3[3]; -+ u32 perfcnt_config; -+ u32 perfcnt_csg_select; -+ u32 perfcnt_fw_enable; -+ u32 perfcnt_csg_enable; -+ u32 perfcnt_csf_enable; -+ u32 perfcnt_shader_enable; -+ u32 perfcnt_tiler_enable; -+ u32 perfcnt_mmu_l2_enable; -+ u32 reserved4[8]; -+ u32 idle_timer; -+}; -+ -+enum panthor_fw_halt_status { -+ PANTHOR_FW_HALT_OK = 0, -+ PANTHOR_FW_HALT_ON_PANIC = 0x4e, -+ PANTHOR_FW_HALT_ON_WATCHDOG_EXPIRATION = 0x4f, -+}; -+ -+struct panthor_fw_global_output_iface { -+ u32 ack; -+ u32 reserved1; -+ u32 doorbell_ack; -+ u32 reserved2; -+ u32 halt_status; -+ u32 perfcnt_status; -+ u32 perfcnt_insert; -+}; -+ -+/** -+ * struct panthor_fw_cs_iface - Firmware command stream slot interface -+ */ -+struct panthor_fw_cs_iface { -+ /** -+ * @lock: Lock protecting access to the panthor_fw_cs_input_iface::req -+ * field. -+ * -+ * Needed so we can update the req field concurrently from the interrupt -+ * handler and the scheduler logic. -+ * -+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW -+ * interface sections are mapped uncached/write-combined right now, and -+ * using cmpxchg() on such mappings leads to SError faults. Revisit when -+ * we have 'SHARED' GPU mappings hooked up. -+ */ -+ spinlock_t lock; -+ -+ /** -+ * @control: Command stream slot control interface. -+ * -+ * Used to expose command stream slot properties. -+ * -+ * This interface is read-only. -+ */ -+ struct panthor_fw_cs_control_iface *control; -+ -+ /** -+ * @input: Command stream slot input interface. -+ * -+ * Used for host updates/events. -+ */ -+ struct panthor_fw_cs_input_iface *input; -+ -+ /** -+ * @output: Command stream slot output interface. -+ * -+ * Used for FW updates/events. -+ * -+ * This interface is read-only. -+ */ -+ const struct panthor_fw_cs_output_iface *output; -+}; -+ -+/** -+ * struct panthor_fw_csg_iface - Firmware command stream group slot interface -+ */ -+struct panthor_fw_csg_iface { -+ /** -+ * @lock: Lock protecting access to the panthor_fw_csg_input_iface::req -+ * field. -+ * -+ * Needed so we can update the req field concurrently from the interrupt -+ * handler and the scheduler logic. -+ * -+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW -+ * interface sections are mapped uncached/write-combined right now, and -+ * using cmpxchg() on such mappings leads to SError faults. Revisit when -+ * we have 'SHARED' GPU mappings hooked up. -+ */ -+ spinlock_t lock; -+ -+ /** -+ * @control: Command stream group slot control interface. -+ * -+ * Used to expose command stream group slot properties. -+ * -+ * This interface is read-only. -+ */ -+ const struct panthor_fw_csg_control_iface *control; -+ -+ /** -+ * @input: Command stream slot input interface. -+ * -+ * Used for host updates/events. -+ */ -+ struct panthor_fw_csg_input_iface *input; -+ -+ /** -+ * @output: Command stream group slot output interface. -+ * -+ * Used for FW updates/events. -+ * -+ * This interface is read-only. -+ */ -+ const struct panthor_fw_csg_output_iface *output; -+}; -+ -+/** -+ * struct panthor_fw_global_iface - Firmware global interface -+ */ -+struct panthor_fw_global_iface { -+ /** -+ * @lock: Lock protecting access to the panthor_fw_global_input_iface::req -+ * field. -+ * -+ * Needed so we can update the req field concurrently from the interrupt -+ * handler and the scheduler/FW management logic. -+ * -+ * TODO: Ideally we'd want to use a cmpxchg() to update the req, but FW -+ * interface sections are mapped uncached/write-combined right now, and -+ * using cmpxchg() on such mappings leads to SError faults. Revisit when -+ * we have 'SHARED' GPU mappings hooked up. -+ */ -+ spinlock_t lock; -+ -+ /** -+ * @control: Command stream group slot control interface. -+ * -+ * Used to expose global FW properties. -+ * -+ * This interface is read-only. -+ */ -+ const struct panthor_fw_global_control_iface *control; -+ -+ /** -+ * @input: Global input interface. -+ * -+ * Used for host updates/events. -+ */ -+ struct panthor_fw_global_input_iface *input; -+ -+ /** -+ * @output: Global output interface. -+ * -+ * Used for FW updates/events. -+ * -+ * This interface is read-only. -+ */ -+ const struct panthor_fw_global_output_iface *output; -+}; -+ -+/** -+ * panthor_fw_toggle_reqs() - Toggle acknowledge bits to send an event to the FW -+ * @__iface: The interface to operate on. -+ * @__in_reg: Name of the register to update in the input section of the interface. -+ * @__out_reg: Name of the register to take as a reference in the output section of the -+ * interface. -+ * @__mask: Mask to apply to the update. -+ * -+ * The Host -> FW event/message passing was designed to be lockless, with each side of -+ * the channel having its writeable section. Events are signaled as a difference between -+ * the host and FW side in the req/ack registers (when a bit differs, there's an event -+ * pending, when they are the same, nothing needs attention). -+ * -+ * This helper allows one to update the req register based on the current value of the -+ * ack register managed by the FW. Toggling a specific bit will flag an event. In order -+ * for events to be re-evaluated, the interface doorbell needs to be rung. -+ * -+ * Concurrent accesses to the same req register is covered. -+ * -+ * Anything requiring atomic updates to multiple registers requires a dedicated lock. -+ */ -+#define panthor_fw_toggle_reqs(__iface, __in_reg, __out_reg, __mask) \ -+ do { \ -+ u32 __cur_val, __new_val, __out_val; \ -+ spin_lock(&(__iface)->lock); \ -+ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ -+ __out_val = READ_ONCE((__iface)->output->__out_reg); \ -+ __new_val = ((__out_val ^ (__mask)) & (__mask)) | (__cur_val & ~(__mask)); \ -+ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ -+ spin_unlock(&(__iface)->lock); \ -+ } while (0) -+ -+/** -+ * panthor_fw_update_reqs() - Update bits to reflect a configuration change -+ * @__iface: The interface to operate on. -+ * @__in_reg: Name of the register to update in the input section of the interface. -+ * @__val: Value to set. -+ * @__mask: Mask to apply to the update. -+ * -+ * Some configuration get passed through req registers that are also used to -+ * send events to the FW. Those req registers being updated from the interrupt -+ * handler, they require special helpers to update the configuration part as well. -+ * -+ * Concurrent accesses to the same req register is covered. -+ * -+ * Anything requiring atomic updates to multiple registers requires a dedicated lock. -+ */ -+#define panthor_fw_update_reqs(__iface, __in_reg, __val, __mask) \ -+ do { \ -+ u32 __cur_val, __new_val; \ -+ spin_lock(&(__iface)->lock); \ -+ __cur_val = READ_ONCE((__iface)->input->__in_reg); \ -+ __new_val = (__cur_val & ~(__mask)) | ((__val) & (__mask)); \ -+ WRITE_ONCE((__iface)->input->__in_reg, __new_val); \ -+ spin_unlock(&(__iface)->lock); \ -+ } while (0) -+ -+struct panthor_fw_global_iface * -+panthor_fw_get_glb_iface(struct panthor_device *ptdev); -+ -+struct panthor_fw_csg_iface * -+panthor_fw_get_csg_iface(struct panthor_device *ptdev, u32 csg_slot); -+ -+struct panthor_fw_cs_iface * -+panthor_fw_get_cs_iface(struct panthor_device *ptdev, u32 csg_slot, u32 cs_slot); -+ -+int panthor_fw_csg_wait_acks(struct panthor_device *ptdev, u32 csg_id, u32 req_mask, -+ u32 *acked, u32 timeout_ms); -+ -+int panthor_fw_glb_wait_acks(struct panthor_device *ptdev, u32 req_mask, u32 *acked, -+ u32 timeout_ms); -+ -+void panthor_fw_ring_csg_doorbells(struct panthor_device *ptdev, u32 csg_slot); -+ -+struct panthor_kernel_bo * -+panthor_fw_alloc_queue_iface_mem(struct panthor_device *ptdev, -+ struct panthor_fw_ringbuf_input_iface **input, -+ const struct panthor_fw_ringbuf_output_iface **output, -+ u32 *input_fw_va, u32 *output_fw_va); -+struct panthor_kernel_bo * -+panthor_fw_alloc_suspend_buf_mem(struct panthor_device *ptdev, size_t size); -+ -+struct panthor_vm *panthor_fw_vm(struct panthor_device *ptdev); -+ -+void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang); -+int panthor_fw_post_reset(struct panthor_device *ptdev); -+ -+static inline void panthor_fw_suspend(struct panthor_device *ptdev) -+{ -+ panthor_fw_pre_reset(ptdev, false); -+} -+ -+static inline int panthor_fw_resume(struct panthor_device *ptdev) -+{ -+ return panthor_fw_post_reset(ptdev); -+} -+ -+int panthor_fw_init(struct panthor_device *ptdev); -+void panthor_fw_unplug(struct panthor_device *ptdev); -+ -+#endif --- -2.42.0 - - -From 227064358e79c8c5d60d6b03c4ea268fa179c2b3 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:40 +0100 -Subject: [PATCH 59/81] drm/panthor: Add the heap logical block - -Tiler heap growing requires some kernel driver involvement: when the -tiler runs out of heap memory, it will raise an exception which is -either directly handled by the firmware if some free heap chunks are -available in the heap context, or passed back to the kernel otherwise. -The heap helpers will be used by the scheduler logic to allocate more -heap chunks to a heap context, when such a situation happens. - -Heap context creation is explicitly requested by userspace (using -the TILER_HEAP_CREATE ioctl), and the returned context is attached to a -queue through some command stream instruction. - -All the kernel does is keep the list of heap chunks allocated to a -context, so they can be freed when TILER_HEAP_DESTROY is called, or -extended when the FW requests a new chunk. - -v4: -- Rework locking to allow concurrent calls to panthor_heap_grow() -- Add a helper to return a heap chunk if we couldn't pass it to the - FW because the group was scheduled out - -v3: -- Add a FIXME for the heap OOM deadlock -- Use the panthor_kernel_bo abstraction for the heap context and heap - chunks -- Drop the panthor_heap_gpu_ctx struct as it is opaque to the driver -- Ensure that the heap context is aligned to the GPU cache line size -- Minor code tidy ups - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Link: https://lore.kernel.org/r/20240122163047.1954733-10-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_heap.c | 596 +++++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_heap.h | 39 ++ - 2 files changed, 635 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_heap.c - create mode 100644 drivers/gpu/drm/panthor/panthor_heap.h - -diff --git a/drivers/gpu/drm/panthor/panthor_heap.c b/drivers/gpu/drm/panthor/panthor_heap.c -new file mode 100644 -index 000000000000..fac51c24b3e9 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_heap.c -@@ -0,0 +1,596 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2023 Collabora ltd. */ -+ -+#include -+#include -+ -+#include -+ -+#include "panthor_device.h" -+#include "panthor_gem.h" -+#include "panthor_heap.h" -+#include "panthor_mmu.h" -+#include "panthor_regs.h" -+ -+/* -+ * The GPU heap context is an opaque structure used by the GPU to track the -+ * heap allocations. The driver should only touch it to initialize it (zero all -+ * fields). Because the CPU and GPU can both access this structure it is -+ * required to be GPU cache line aligned. -+ */ -+#define HEAP_CONTEXT_SIZE 32 -+ -+/** -+ * struct panthor_heap_chunk_header - Heap chunk header -+ */ -+struct panthor_heap_chunk_header { -+ /** -+ * @next: Next heap chunk in the list. -+ * -+ * This is a GPU VA. -+ */ -+ u64 next; -+ -+ /** @unknown: MBZ. */ -+ u32 unknown[14]; -+}; -+ -+/** -+ * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks. -+ */ -+struct panthor_heap_chunk { -+ /** @node: Used to insert the heap chunk in panthor_heap::chunks. */ -+ struct list_head node; -+ -+ /** @bo: Buffer object backing the heap chunk. */ -+ struct panthor_kernel_bo *bo; -+}; -+ -+/** -+ * struct panthor_heap - Structure used to manage tiler heap contexts. -+ */ -+struct panthor_heap { -+ /** @chunks: List containing all heap chunks allocated so far. */ -+ struct list_head chunks; -+ -+ /** @lock: Lock protecting insertion in the chunks list. */ -+ struct mutex lock; -+ -+ /** @chunk_size: Size of each chunk. */ -+ u32 chunk_size; -+ -+ /** @max_chunks: Maximum number of chunks. */ -+ u32 max_chunks; -+ -+ /** -+ * @target_in_flight: Number of in-flight render passes after which -+ * we'd let the FW wait for fragment job to finish instead of allocating new chunks. -+ */ -+ u32 target_in_flight; -+ -+ /** @chunk_count: Number of heap chunks currently allocated. */ -+ u32 chunk_count; -+}; -+ -+#define MAX_HEAPS_PER_POOL 128 -+ -+/** -+ * struct panthor_heap_pool - Pool of heap contexts -+ * -+ * The pool is attached to a panthor_file and can't be shared across processes. -+ */ -+struct panthor_heap_pool { -+ /** @refcount: Reference count. */ -+ struct kref refcount; -+ -+ /** @ptdev: Device. */ -+ struct panthor_device *ptdev; -+ -+ /** @vm: VM this pool is bound to. */ -+ struct panthor_vm *vm; -+ -+ /** @lock: Lock protecting access to @xa. */ -+ struct rw_semaphore lock; -+ -+ /** @xa: Array storing panthor_heap objects. */ -+ struct xarray xa; -+ -+ /** @gpu_contexts: Buffer object containing the GPU heap contexts. */ -+ struct panthor_kernel_bo *gpu_contexts; -+}; -+ -+static int panthor_heap_ctx_stride(struct panthor_device *ptdev) -+{ -+ u32 l2_features = ptdev->gpu_info.l2_features; -+ u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features); -+ -+ return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size); -+} -+ -+static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id) -+{ -+ return panthor_heap_ctx_stride(pool->ptdev) * id; -+} -+ -+static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id) -+{ -+ return pool->gpu_contexts->kmap + -+ panthor_get_heap_ctx_offset(pool, id); -+} -+ -+static void panthor_free_heap_chunk(struct panthor_vm *vm, -+ struct panthor_heap *heap, -+ struct panthor_heap_chunk *chunk) -+{ -+ mutex_lock(&heap->lock); -+ list_del(&chunk->node); -+ heap->chunk_count--; -+ mutex_unlock(&heap->lock); -+ -+ panthor_kernel_bo_destroy(vm, chunk->bo); -+ kfree(chunk); -+} -+ -+static int panthor_alloc_heap_chunk(struct panthor_device *ptdev, -+ struct panthor_vm *vm, -+ struct panthor_heap *heap, -+ bool initial_chunk) -+{ -+ struct panthor_heap_chunk *chunk; -+ struct panthor_heap_chunk_header *hdr; -+ int ret; -+ -+ chunk = kmalloc(sizeof(*chunk), GFP_KERNEL); -+ if (!chunk) -+ return -ENOMEM; -+ -+ chunk->bo = panthor_kernel_bo_create(ptdev, vm, heap->chunk_size, -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+ if (IS_ERR(chunk->bo)) { -+ ret = PTR_ERR(chunk->bo); -+ goto err_free_chunk; -+ } -+ -+ ret = panthor_kernel_bo_vmap(chunk->bo); -+ if (ret) -+ goto err_destroy_bo; -+ -+ hdr = chunk->bo->kmap; -+ memset(hdr, 0, sizeof(*hdr)); -+ -+ if (initial_chunk && !list_empty(&heap->chunks)) { -+ struct panthor_heap_chunk *prev_chunk; -+ u64 prev_gpuva; -+ -+ prev_chunk = list_first_entry(&heap->chunks, -+ struct panthor_heap_chunk, -+ node); -+ -+ prev_gpuva = panthor_kernel_bo_gpuva(prev_chunk->bo); -+ hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) | -+ (heap->chunk_size >> 12); -+ } -+ -+ panthor_kernel_bo_vunmap(chunk->bo); -+ -+ mutex_lock(&heap->lock); -+ list_add(&chunk->node, &heap->chunks); -+ heap->chunk_count++; -+ mutex_unlock(&heap->lock); -+ -+ return 0; -+ -+err_destroy_bo: -+ panthor_kernel_bo_destroy(vm, chunk->bo); -+ -+err_free_chunk: -+ kfree(chunk); -+ -+ return ret; -+} -+ -+static void panthor_free_heap_chunks(struct panthor_vm *vm, -+ struct panthor_heap *heap) -+{ -+ struct panthor_heap_chunk *chunk, *tmp; -+ -+ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) -+ panthor_free_heap_chunk(vm, heap, chunk); -+} -+ -+static int panthor_alloc_heap_chunks(struct panthor_device *ptdev, -+ struct panthor_vm *vm, -+ struct panthor_heap *heap, -+ u32 chunk_count) -+{ -+ int ret; -+ u32 i; -+ -+ for (i = 0; i < chunk_count; i++) { -+ ret = panthor_alloc_heap_chunk(ptdev, vm, heap, true); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+static int -+panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle) -+{ -+ struct panthor_heap *heap; -+ -+ heap = xa_erase(&pool->xa, handle); -+ if (!heap) -+ return -EINVAL; -+ -+ panthor_free_heap_chunks(pool->vm, heap); -+ mutex_destroy(&heap->lock); -+ kfree(heap); -+ return 0; -+} -+ -+/** -+ * panthor_heap_destroy() - Destroy a heap context -+ * @pool: Pool this context belongs to. -+ * @handle: Handle returned by panthor_heap_create(). -+ */ -+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle) -+{ -+ int ret; -+ -+ down_write(&pool->lock); -+ ret = panthor_heap_destroy_locked(pool, handle); -+ up_write(&pool->lock); -+ -+ return ret; -+} -+ -+/** -+ * panthor_heap_create() - Create a heap context -+ * @pool: Pool to instantiate the heap context from. -+ * @initial_chunk_count: Number of chunk allocated at initialization time. -+ * Must be at least 1. -+ * @chunk_size: The size of each chunk. Must be a power of two between 256k -+ * and 2M. -+ * @max_chunks: Maximum number of chunks that can be allocated. -+ * @target_in_flight: Maximum number of in-flight render passes. -+ * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap -+ * context. -+ * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk -+ * assigned to the heap context. -+ * -+ * Return: a positive handle on success, a negative error otherwise. -+ */ -+int panthor_heap_create(struct panthor_heap_pool *pool, -+ u32 initial_chunk_count, -+ u32 chunk_size, -+ u32 max_chunks, -+ u32 target_in_flight, -+ u64 *heap_ctx_gpu_va, -+ u64 *first_chunk_gpu_va) -+{ -+ struct panthor_heap *heap; -+ struct panthor_heap_chunk *first_chunk; -+ struct panthor_vm *vm; -+ int ret = 0; -+ u32 id; -+ -+ if (initial_chunk_count == 0) -+ return -EINVAL; -+ -+ if (hweight32(chunk_size) != 1 || -+ chunk_size < SZ_256K || chunk_size > SZ_2M) -+ return -EINVAL; -+ -+ down_read(&pool->lock); -+ vm = panthor_vm_get(pool->vm); -+ up_read(&pool->lock); -+ -+ /* The pool has been destroyed, we can't create a new heap. */ -+ if (!vm) -+ return -EINVAL; -+ -+ heap = kzalloc(sizeof(*heap), GFP_KERNEL); -+ if (!heap) { -+ ret = -ENOMEM; -+ goto err_put_vm; -+ } -+ -+ mutex_init(&heap->lock); -+ INIT_LIST_HEAD(&heap->chunks); -+ heap->chunk_size = chunk_size; -+ heap->max_chunks = max_chunks; -+ heap->target_in_flight = target_in_flight; -+ -+ ret = panthor_alloc_heap_chunks(pool->ptdev, vm, heap, -+ initial_chunk_count); -+ if (ret) -+ goto err_free_heap; -+ -+ first_chunk = list_first_entry(&heap->chunks, -+ struct panthor_heap_chunk, -+ node); -+ *first_chunk_gpu_va = panthor_kernel_bo_gpuva(first_chunk->bo); -+ -+ down_write(&pool->lock); -+ /* The pool has been destroyed, we can't create a new heap. */ -+ if (!pool->vm) { -+ ret = -EINVAL; -+ } else { -+ ret = xa_alloc(&pool->xa, &id, heap, XA_LIMIT(1, MAX_HEAPS_PER_POOL), GFP_KERNEL); -+ if (!ret) { -+ void *gpu_ctx = panthor_get_heap_ctx(pool, id); -+ -+ memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev)); -+ *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(pool->gpu_contexts) + -+ panthor_get_heap_ctx_offset(pool, id); -+ } -+ } -+ up_write(&pool->lock); -+ -+ if (ret) -+ goto err_free_heap; -+ -+ panthor_vm_put(vm); -+ return id; -+ -+err_free_heap: -+ panthor_free_heap_chunks(pool->vm, heap); -+ mutex_destroy(&heap->lock); -+ kfree(heap); -+ -+err_put_vm: -+ panthor_vm_put(vm); -+ return ret; -+} -+ -+/** -+ * panthor_heap_return_chunk() - Return an unused heap chunk -+ * @pool: The pool this heap belongs to. -+ * @heap_gpu_va: The GPU address of the heap context. -+ * @chunk_gpu_va: The chunk VA to return. -+ * -+ * This function is used when a chunk allocated with panthor_heap_grow() -+ * couldn't be linked to the heap context through the FW interface because -+ * the group requesting the allocation was scheduled out in the meantime. -+ */ -+int panthor_heap_return_chunk(struct panthor_heap_pool *pool, -+ u64 heap_gpu_va, -+ u64 chunk_gpu_va) -+{ -+ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); -+ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); -+ struct panthor_heap_chunk *chunk, *tmp, *removed = NULL; -+ struct panthor_heap *heap; -+ int ret; -+ -+ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) -+ return -EINVAL; -+ -+ down_read(&pool->lock); -+ heap = xa_load(&pool->xa, heap_id); -+ if (!heap) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ -+ chunk_gpu_va &= GENMASK_ULL(63, 12); -+ -+ mutex_lock(&heap->lock); -+ list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) { -+ if (panthor_kernel_bo_gpuva(chunk->bo) == chunk_gpu_va) { -+ removed = chunk; -+ list_del(&chunk->node); -+ heap->chunk_count--; -+ break; -+ } -+ } -+ mutex_unlock(&heap->lock); -+ -+ if (removed) { -+ panthor_kernel_bo_destroy(pool->vm, chunk->bo); -+ kfree(chunk); -+ ret = 0; -+ } else { -+ ret = -EINVAL; -+ } -+ -+out_unlock: -+ up_read(&pool->lock); -+ return ret; -+} -+ -+/** -+ * panthor_heap_grow() - Make a heap context grow. -+ * @pool: The pool this heap belongs to. -+ * @heap_gpu_va: The GPU address of the heap context. -+ * @renderpasses_in_flight: Number of render passes currently in-flight. -+ * @pending_frag_count: Number of fragment jobs waiting for execution/completion. -+ * @new_chunk_gpu_va: Pointer used to return the chunk VA. -+ */ -+int panthor_heap_grow(struct panthor_heap_pool *pool, -+ u64 heap_gpu_va, -+ u32 renderpasses_in_flight, -+ u32 pending_frag_count, -+ u64 *new_chunk_gpu_va) -+{ -+ u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(pool->gpu_contexts); -+ u32 heap_id = (u32)offset / panthor_heap_ctx_stride(pool->ptdev); -+ struct panthor_heap_chunk *chunk; -+ struct panthor_heap *heap; -+ int ret; -+ -+ if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL) -+ return -EINVAL; -+ -+ down_read(&pool->lock); -+ heap = xa_load(&pool->xa, heap_id); -+ if (!heap) { -+ ret = -EINVAL; -+ goto out_unlock; -+ } -+ -+ /* If we reached the target in-flight render passes, or if we -+ * reached the maximum number of chunks, let the FW figure another way to -+ * find some memory (wait for render passes to finish, or call the exception -+ * handler provided by the userspace driver, if any). -+ */ -+ if (renderpasses_in_flight > heap->target_in_flight || -+ (pending_frag_count > 0 && heap->chunk_count >= heap->max_chunks)) { -+ ret = -EBUSY; -+ goto out_unlock; -+ } else if (heap->chunk_count >= heap->max_chunks) { -+ ret = -ENOMEM; -+ goto out_unlock; -+ } -+ -+ /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation, which -+ * relies on blocking allocations (both for the BO itself, and backing -+ * memory), which might cause a deadlock because we're called from a context -+ * where we hold the panthor scheduler lock, thus preventing job cleanups -+ * that could free up some memory. The jobs themselves will timeout, but -+ * we'll still be blocked there. The only solution here is to implement -+ * something similar to shmem_sg_alloc_table() in i915, so we can do -+ * non-blocking allocations, and just kill the job when we run out-of-memory -+ * for the tiler context. -+ */ -+ ret = panthor_alloc_heap_chunk(pool->ptdev, pool->vm, heap, false); -+ if (ret) -+ goto out_unlock; -+ -+ chunk = list_first_entry(&heap->chunks, -+ struct panthor_heap_chunk, -+ node); -+ *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(chunk->bo) & GENMASK_ULL(63, 12)) | -+ (heap->chunk_size >> 12); -+ ret = 0; -+ -+out_unlock: -+ up_read(&pool->lock); -+ return ret; -+} -+ -+static void panthor_heap_pool_release(struct kref *refcount) -+{ -+ struct panthor_heap_pool *pool = -+ container_of(refcount, struct panthor_heap_pool, refcount); -+ -+ xa_destroy(&pool->xa); -+ kfree(pool); -+} -+ -+/** -+ * panthor_heap_pool_put() - Release a heap pool reference -+ * @pool: Pool to release the reference on. Can be NULL. -+ */ -+void panthor_heap_pool_put(struct panthor_heap_pool *pool) -+{ -+ if (pool) -+ kref_put(&pool->refcount, panthor_heap_pool_release); -+} -+ -+/** -+ * panthor_heap_pool_get() - Get a heap pool reference -+ * @pool: Pool to get the reference on. Can be NULL. -+ * -+ * Return: @pool. -+ */ -+struct panthor_heap_pool * -+panthor_heap_pool_get(struct panthor_heap_pool *pool) -+{ -+ if (pool) -+ kref_get(&pool->refcount); -+ -+ return pool; -+} -+ -+/** -+ * panthor_heap_pool_create() - Create a heap pool -+ * @ptdev: Device. -+ * @vm: The VM this heap pool will be attached to. -+ * -+ * Heap pools might contain up to 128 heap contexts, and are per-VM. -+ * -+ * Return: A valid pointer on success, a negative error code otherwise. -+ */ -+struct panthor_heap_pool * -+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm) -+{ -+ size_t bosize = ALIGN(MAX_HEAPS_PER_POOL * -+ panthor_heap_ctx_stride(ptdev), -+ 4096); -+ struct panthor_heap_pool *pool; -+ int ret = 0; -+ -+ pool = kzalloc(sizeof(*pool), GFP_KERNEL); -+ if (!pool) -+ return ERR_PTR(-ENOMEM); -+ -+ /* We want a weak ref here: the heap pool belongs to the VM, so we're -+ * sure that, as long as the heap pool exists, the VM exists too. -+ */ -+ pool->vm = vm; -+ pool->ptdev = ptdev; -+ init_rwsem(&pool->lock); -+ xa_init_flags(&pool->xa, XA_FLAGS_ALLOC1); -+ kref_init(&pool->refcount); -+ -+ pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, bosize, -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+ if (IS_ERR(pool->gpu_contexts)) { -+ ret = PTR_ERR(pool->gpu_contexts); -+ goto err_destroy_pool; -+ } -+ -+ ret = panthor_kernel_bo_vmap(pool->gpu_contexts); -+ if (ret) -+ goto err_destroy_pool; -+ -+ return pool; -+ -+err_destroy_pool: -+ panthor_heap_pool_destroy(pool); -+ return ERR_PTR(ret); -+} -+ -+/** -+ * panthor_heap_pool_destroy() - Destroy a heap pool. -+ * @pool: Pool to destroy. -+ * -+ * This function destroys all heap contexts and their resources. Thus -+ * preventing any use of the heap context or the chunk attached to them -+ * after that point. -+ * -+ * If the GPU still has access to some heap contexts, a fault should be -+ * triggered, which should flag the command stream groups using these -+ * context as faulty. -+ * -+ * The heap pool object is only released when all references to this pool -+ * are released. -+ */ -+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool) -+{ -+ struct panthor_heap *heap; -+ unsigned long i; -+ -+ if (!pool) -+ return; -+ -+ down_write(&pool->lock); -+ xa_for_each(&pool->xa, i, heap) -+ drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i)); -+ -+ if (!IS_ERR_OR_NULL(pool->gpu_contexts)) -+ panthor_kernel_bo_destroy(pool->vm, pool->gpu_contexts); -+ -+ /* Reflects the fact the pool has been destroyed. */ -+ pool->vm = NULL; -+ up_write(&pool->lock); -+ -+ panthor_heap_pool_put(pool); -+} -diff --git a/drivers/gpu/drm/panthor/panthor_heap.h b/drivers/gpu/drm/panthor/panthor_heap.h -new file mode 100644 -index 000000000000..25a5f2bba445 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_heap.h -@@ -0,0 +1,39 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_HEAP_H__ -+#define __PANTHOR_HEAP_H__ -+ -+#include -+ -+struct panthor_device; -+struct panthor_heap_pool; -+struct panthor_vm; -+ -+int panthor_heap_create(struct panthor_heap_pool *pool, -+ u32 initial_chunk_count, -+ u32 chunk_size, -+ u32 max_chunks, -+ u32 target_in_flight, -+ u64 *heap_ctx_gpu_va, -+ u64 *first_chunk_gpu_va); -+int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle); -+ -+struct panthor_heap_pool * -+panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm); -+void panthor_heap_pool_destroy(struct panthor_heap_pool *pool); -+ -+struct panthor_heap_pool * -+panthor_heap_pool_get(struct panthor_heap_pool *pool); -+void panthor_heap_pool_put(struct panthor_heap_pool *pool); -+ -+int panthor_heap_grow(struct panthor_heap_pool *pool, -+ u64 heap_gpu_va, -+ u32 renderpasses_in_flight, -+ u32 pending_frag_count, -+ u64 *new_chunk_gpu_va); -+int panthor_heap_return_chunk(struct panthor_heap_pool *pool, -+ u64 heap_gpu_va, -+ u64 chunk_gpu_va); -+ -+#endif --- -2.42.0 - - -From c89fb08359d67f842bade7b0267806d105423d38 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:41 +0100 -Subject: [PATCH 60/81] drm/panthor: Add the scheduler logical block - -This is the piece of software interacting with the FW scheduler, and -taking care of some scheduling aspects when the FW comes short of slots -scheduling slots. Indeed, the FW only expose a few slots, and the kernel -has to give all submission contexts, a chance to execute their jobs. - -The kernel-side scheduler is timeslice-based, with a round-robin queue -per priority level. - -Job submission is handled with a 1:1 drm_sched_entity:drm_gpu_scheduler, -allowing us to delegate the dependency tracking to the core. - -All the gory details should be documented inline. - -v4: -- Check drmm_mutex_init() return code -- s/drm_gem_vmap_unlocked/drm_gem_vunmap_unlocked/ in - panthor_queue_put_syncwait_obj() -- Drop unneeded WARN_ON() in cs_slot_sync_queue_state_locked() -- Use atomic_xchg() instead of atomic_fetch_and(0) -- Fix typos -- Let panthor_kernel_bo_destroy() check for IS_ERR_OR_NULL() BOs -- Defer TILER_OOM event handling to a separate workqueue to prevent - deadlocks when the heap chunk allocation is blocked on mem-reclaim. - This is just a temporary solution, until we add support for - non-blocking/failable allocations -- Pass the scheduler workqueue to drm_sched instead of instantiating - a separate one (no longer needed now that heap chunk allocation - happens on a dedicated wq) -- Set WQ_MEM_RECLAIM on the scheduler workqueue, so we can handle - job timeouts when the system is under mem pressure, and hopefully - free up some memory retained by these jobs - -v3: -- Rework the FW event handling logic to avoid races -- Make sure MMU faults kill the group immediately -- Use the panthor_kernel_bo abstraction for group/queue buffers -- Make in_progress an atomic_t, so we can check it without the reset lock - held -- Don't limit the number of groups per context to the FW scheduler - capacity. Fix the limit to 128 for now. -- Add a panthor_job_vm() helper -- Account for panthor_vm changes -- Add our job fence as DMA_RESV_USAGE_WRITE to all external objects - (was previously DMA_RESV_USAGE_BOOKKEEP). I don't get why, given - we're supposed to be fully-explicit, but other drivers do that, so - there must be a good reason -- Account for drm_sched changes -- Provide a panthor_queue_put_syncwait_obj() -- Unconditionally return groups to their idle list in - panthor_sched_suspend() -- Condition of sched_queue_{,delayed_}work fixed to be only when a reset - isn't pending or in progress. -- Several typos in comments fixed. - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Link: https://lore.kernel.org/r/20240122163047.1954733-11-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_sched.c | 3500 +++++++++++++++++++++++ - drivers/gpu/drm/panthor/panthor_sched.h | 48 + - 2 files changed, 3548 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_sched.c - create mode 100644 drivers/gpu/drm/panthor/panthor_sched.h - -diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c -new file mode 100644 -index 000000000000..b57f4a5f5176 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_sched.c -@@ -0,0 +1,3500 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2023 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "panthor_sched.h" -+#include "panthor_devfreq.h" -+#include "panthor_device.h" -+#include "panthor_gem.h" -+#include "panthor_heap.h" -+#include "panthor_regs.h" -+#include "panthor_gpu.h" -+#include "panthor_fw.h" -+#include "panthor_mmu.h" -+ -+/** -+ * DOC: Scheduler -+ * -+ * Mali CSF hardware adopts a firmware-assisted scheduling model, where -+ * the firmware takes care of scheduling aspects, to some extend. -+ * -+ * The scheduling happens at the scheduling group level, each group -+ * contains 1 to N queues (N is FW/hardware dependent, and exposed -+ * through the firmware interface). Each queue is assigned a command -+ * stream ring buffer, which serves as a way to get jobs submitted to -+ * the GPU, among other things. -+ * -+ * The firmware can schedule a maximum of M groups (M is FW/hardware -+ * dependent, and exposed through the firmware interface). Passed -+ * this maximum number of groups, the kernel must take care of -+ * rotating the groups passed to the firmware so every group gets -+ * a chance to have his queues scheduled for execution. -+ * -+ * The current implementation only supports with kernel-mode queues. -+ * In other terms, userspace doesn't have access to the ring-buffer. -+ * Instead, userspace passes indirect command stream buffers that are -+ * called from the queue ring-buffer by the kernel using a pre-defined -+ * sequence of command stream instructions to ensure the userspace driver -+ * always gets consistent results (cache maintenance, -+ * synchronization, ...). -+ * -+ * We rely on the drm_gpu_scheduler framework to deal with job -+ * dependencies and submission. As any other driver dealing with a -+ * FW-scheduler, we use the 1:1 entity:scheduler mode, such that each -+ * entity has its own job scheduler. When a job is ready to be executed -+ * (all its dependencies are met), it is pushed to the appropriate -+ * queue ring-buffer, and the group is scheduled for execution if it -+ * wasn't already active. -+ * -+ * Kernel-side group scheduling is timeslice-based. When we have less -+ * groups than there are slots, the periodic tick is disabled and we -+ * just let the FW schedule the active groups. When there are more -+ * groups than slots, we let each group a chance to execute stuff for -+ * a given amount of time, and then re-evaluate and pick new groups -+ * to schedule. The group selection algorithm is based on -+ * priority+round-robin. -+ * -+ * Even though user-mode queues is out of the scope right now, the -+ * current design takes them into account by avoiding any guess on the -+ * group/queue state that would be based on information we wouldn't have -+ * if userspace was in charge of the ring-buffer. That's also one of the -+ * reason we don't do 'cooperative' scheduling (encoding FW group slot -+ * reservation as dma_fence that would be returned from the -+ * drm_gpu_scheduler::prepare_job() hook, and treating group rotation as -+ * a queue of waiters, ordered by job submission order). This approach -+ * would work for kernel-mode queues, but would make user-mode queues a -+ * lot more complicated to retrofit. -+ */ -+ -+#define JOB_TIMEOUT_MS 5000 -+ -+#define MIN_CS_PER_CSG 8 -+ -+#define MIN_CSGS 3 -+#define MAX_CSG_PRIO 0xf -+ -+struct panthor_group; -+ -+/** -+ * struct panthor_csg_slot - Command stream group slot -+ * -+ * This represents a FW slot for a scheduling group. -+ */ -+struct panthor_csg_slot { -+ /** @group: Scheduling group bound to this slot. */ -+ struct panthor_group *group; -+ -+ /** @priority: Group priority. */ -+ u8 priority; -+ -+ /** -+ * @idle: True if the group bound to this slot is idle. -+ * -+ * A group is idle when it has nothing waiting for execution on -+ * all its queues, or when queues are blocked waiting for something -+ * to happen (synchronization object). -+ */ -+ bool idle; -+}; -+ -+/** -+ * enum panthor_csg_priority - Group priority -+ */ -+enum panthor_csg_priority { -+ /** @PANTHOR_CSG_PRIORITY_LOW: Low priority group. */ -+ PANTHOR_CSG_PRIORITY_LOW = 0, -+ -+ /** @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. */ -+ PANTHOR_CSG_PRIORITY_MEDIUM, -+ -+ /** @PANTHOR_CSG_PRIORITY_HIGH: High priority group. */ -+ PANTHOR_CSG_PRIORITY_HIGH, -+ -+ /** -+ * @PANTHOR_CSG_PRIORITY_RT: Real-time priority group. -+ * -+ * Real-time priority allows one to preempt scheduling of other -+ * non-real-time groups. When such a group becomes executable, -+ * it will evict the group with the lowest non-rt priority if -+ * there's no free group slot available. -+ * -+ * Currently not exposed to userspace. -+ */ -+ PANTHOR_CSG_PRIORITY_RT, -+ -+ /** @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. */ -+ PANTHOR_CSG_PRIORITY_COUNT, -+}; -+ -+/** -+ * struct panthor_scheduler - Object used to manage the scheduler -+ */ -+struct panthor_scheduler { -+ /** @ptdev: Device. */ -+ struct panthor_device *ptdev; -+ -+ /** -+ * @wq: Workqueue used by our internal scheduler logic and -+ * drm_gpu_scheduler. -+ * -+ * Used for the scheduler tick, group update or other kind of FW -+ * event processing that can't be handled in the threaded interrupt -+ * path. Also passed to the drm_gpu_scheduler instances embedded -+ * in panthor_queue. -+ */ -+ struct workqueue_struct *wq; -+ -+ /** -+ * @heap_alloc_wq: Workqueue used to schedule tiler_oom works. -+ * -+ * We have a queue dedicated to heap chunk allocation works to avoid -+ * blocking the rest of the scheduler if the allocation tries to -+ * reclaim memory. -+ */ -+ struct workqueue_struct *heap_alloc_wq; -+ -+ /** @tick_work: Work executed on a scheduling tick. */ -+ struct delayed_work tick_work; -+ -+ /** -+ * @sync_upd_work: Work used to process synchronization object updates. -+ * -+ * We use this work to unblock queues/groups that were waiting on a -+ * synchronization object. -+ */ -+ struct work_struct sync_upd_work; -+ -+ /** -+ * @fw_events_work: Work used to process FW events outside the interrupt path. -+ * -+ * Even if the interrupt is threaded, we need any event processing -+ * that require taking the panthor_scheduler::lock to be processed -+ * outside the interrupt path so we don't block the tick logic when -+ * it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the -+ * event processing requires taking this lock, we just delegate all -+ * FW event processing to the scheduler workqueue. -+ */ -+ struct work_struct fw_events_work; -+ -+ /** -+ * @fw_events: Bitmask encoding pending FW events. -+ */ -+ atomic_t fw_events; -+ -+ /** -+ * @resched_target: When the next tick should occur. -+ * -+ * Expressed in jiffies. -+ */ -+ u64 resched_target; -+ -+ /** -+ * @last_tick: When the last tick occurred. -+ * -+ * Expressed in jiffies. -+ */ -+ u64 last_tick; -+ -+ /** @tick_period: Tick period in jiffies. */ -+ u64 tick_period; -+ -+ /** -+ * @lock: Lock protecting access to all the scheduler fields. -+ * -+ * Should be taken in the tick work, the irq handler, and anywhere the @groups -+ * fields are touched. -+ */ -+ struct mutex lock; -+ -+ /** @groups: Various lists used to classify groups. */ -+ struct { -+ /** -+ * @runnable: Runnable group lists. -+ * -+ * When a group has queues that want to execute something, -+ * its panthor_group::run_node should be inserted here. -+ * -+ * One list per-priority. -+ */ -+ struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT]; -+ -+ /** -+ * @idle: Idle group lists. -+ * -+ * When all queues of a group are idle (either because they -+ * have nothing to execute, or because they are blocked), the -+ * panthor_group::run_node field should be inserted here. -+ * -+ * One list per-priority. -+ */ -+ struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT]; -+ -+ /** -+ * @waiting: List of groups whose queues are blocked on a -+ * synchronization object. -+ * -+ * Insert panthor_group::wait_node here when a group is waiting -+ * for synchronization objects to be signaled. -+ * -+ * This list is evaluated in the @sync_upd_work work. -+ */ -+ struct list_head waiting; -+ } groups; -+ -+ /** -+ * @csg_slots: FW command stream group slots. -+ */ -+ struct panthor_csg_slot csg_slots[MAX_CSGS]; -+ -+ /** @csg_slot_count: Number of command stream group slots exposed by the FW. */ -+ u32 csg_slot_count; -+ -+ /** @cs_slot_count: Number of command stream slot per group slot exposed by the FW. */ -+ u32 cs_slot_count; -+ -+ /** @as_slot_count: Number of address space slots supported by the MMU. */ -+ u32 as_slot_count; -+ -+ /** @used_csg_slot_count: Number of command stream group slot currently used. */ -+ u32 used_csg_slot_count; -+ -+ /** @sb_slot_count: Number of scoreboard slots. */ -+ u32 sb_slot_count; -+ -+ /** -+ * @might_have_idle_groups: True if an active group might have become idle. -+ * -+ * This will force a tick, so other runnable groups can be scheduled if one -+ * or more active groups became idle. -+ */ -+ bool might_have_idle_groups; -+ -+ /** @pm: Power management related fields. */ -+ struct { -+ /** @has_ref: True if the scheduler owns a runtime PM reference. */ -+ bool has_ref; -+ } pm; -+ -+ /** @reset: Reset related fields. */ -+ struct { -+ /** @lock: Lock protecting the other reset fields. */ -+ struct mutex lock; -+ -+ /** -+ * @in_progress: True if a reset is in progress. -+ * -+ * Set to true in panthor_sched_pre_reset() and back to false in -+ * panthor_sched_post_reset(). -+ */ -+ atomic_t in_progress; -+ -+ /** -+ * @stopped_groups: List containing all groups that were stopped -+ * before a reset. -+ * -+ * Insert panthor_group::run_node in the pre_reset path. -+ */ -+ struct list_head stopped_groups; -+ } reset; -+}; -+ -+/** -+ * struct panthor_syncobj_32b - 32-bit FW synchronization object -+ */ -+struct panthor_syncobj_32b { -+ /** @seqno: Sequence number. */ -+ u32 seqno; -+ -+ /** -+ * @status: Status. -+ * -+ * Not zero on failure. -+ */ -+ u32 status; -+}; -+ -+/** -+ * struct panthor_syncobj_64b - 64-bit FW synchronization object -+ */ -+struct panthor_syncobj_64b { -+ /** @seqno: Sequence number. */ -+ u64 seqno; -+ -+ /** -+ * @status: Status. -+ * -+ * Not zero on failure. -+ */ -+ u32 status; -+ -+ /** @pad: MBZ. */ -+ u32 pad; -+}; -+ -+/** -+ * struct panthor_queue - Execution queue -+ */ -+struct panthor_queue { -+ /** @scheduler: DRM scheduler used for this queue. */ -+ struct drm_gpu_scheduler scheduler; -+ -+ /** @entity: DRM scheduling entity used for this queue. */ -+ struct drm_sched_entity entity; -+ -+ /** -+ * @remaining_time: Time remaining before the job timeout expires. -+ * -+ * The job timeout is suspended when the queue is not scheduled by the -+ * FW. Every time we suspend the timer, we need to save the remaining -+ * time so we can restore it later on. -+ */ -+ unsigned long remaining_time; -+ -+ /** @timeout_suspended: True if the job timeout was suspended. */ -+ bool timeout_suspended; -+ -+ /** -+ * @doorbell_id: Doorbell assigned to this queue. -+ * -+ * Right now, all groups share the same doorbell, and the doorbell ID -+ * is assigned to group_slot + 1 when the group is assigned a slot. But -+ * we might decide to provide fine grained doorbell assignment at some -+ * point, so don't have to wake up all queues in a group every time one -+ * of them is updated. -+ */ -+ u8 doorbell_id; -+ -+ /** -+ * @priority: Priority of the queue inside the group. -+ * -+ * Must be less than 16 (Only 4 bits available). -+ */ -+ u8 priority; -+#define CSF_MAX_QUEUE_PRIO GENMASK(3, 0) -+ -+ /** @ringbuf: Command stream ring-buffer. */ -+ struct panthor_kernel_bo *ringbuf; -+ -+ /** @iface: Firmware interface. */ -+ struct { -+ /** @mem: FW memory allocated for this interface. */ -+ struct panthor_kernel_bo *mem; -+ -+ /** @input: Input interface. */ -+ struct panthor_fw_ringbuf_input_iface *input; -+ -+ /** @output: Output interface. */ -+ const struct panthor_fw_ringbuf_output_iface *output; -+ -+ /** @input_fw_va: FW virtual address of the input interface buffer. */ -+ u32 input_fw_va; -+ -+ /** @output_fw_va: FW virtual address of the output interface buffer. */ -+ u32 output_fw_va; -+ } iface; -+ -+ /** -+ * @syncwait: Stores information about the synchronization object this -+ * queue is waiting on. -+ */ -+ struct { -+ /** @gpu_va: GPU address of the synchronization object. */ -+ u64 gpu_va; -+ -+ /** @ref: Reference value to compare against. */ -+ u64 ref; -+ -+ /** @gt: True if this is a greater-than test. */ -+ bool gt; -+ -+ /** @sync64: True if this is a 64-bit sync object. */ -+ bool sync64; -+ -+ /** @bo: Buffer object holding the synchronization object. */ -+ struct drm_gem_object *obj; -+ -+ /** @offset: Offset of the synchronization object inside @bo. */ -+ u64 offset; -+ -+ /** -+ * @kmap: Kernel mapping of the buffer object holding the -+ * synchronization object. -+ */ -+ void *kmap; -+ } syncwait; -+ -+ /** @fence_ctx: Fence context fields. */ -+ struct { -+ /** @lock: Used to protect access to all fences allocated by this context. */ -+ spinlock_t lock; -+ -+ /** -+ * @id: Fence context ID. -+ * -+ * Allocated with dma_fence_context_alloc(). -+ */ -+ u64 id; -+ -+ /** @seqno: Sequence number of the last initialized fence. */ -+ atomic64_t seqno; -+ -+ /** -+ * @in_flight_jobs: List containing all in-flight jobs. -+ * -+ * Used to keep track and signal panthor_job::done_fence when the -+ * synchronization object attached to the queue is signaled. -+ */ -+ struct list_head in_flight_jobs; -+ } fence_ctx; -+}; -+ -+/** -+ * enum panthor_group_state - Scheduling group state. -+ */ -+enum panthor_group_state { -+ /** @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. */ -+ PANTHOR_CS_GROUP_CREATED, -+ -+ /** @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. */ -+ PANTHOR_CS_GROUP_ACTIVE, -+ -+ /** -+ * @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is -+ * inactive/suspended right now. -+ */ -+ PANTHOR_CS_GROUP_SUSPENDED, -+ -+ /** -+ * @PANTHOR_CS_GROUP_TERMINATED: Group was terminated. -+ * -+ * Can no longer be scheduled. The only allowed action is a destruction. -+ */ -+ PANTHOR_CS_GROUP_TERMINATED, -+}; -+ -+/** -+ * struct panthor_group - Scheduling group object -+ */ -+struct panthor_group { -+ /** @refcount: Reference count */ -+ struct kref refcount; -+ -+ /** @ptdev: Device. */ -+ struct panthor_device *ptdev; -+ -+ /** @vm: VM bound to the group. */ -+ struct panthor_vm *vm; -+ -+ /** @compute_core_mask: Mask of shader cores that can be used for compute jobs. */ -+ u64 compute_core_mask; -+ -+ /** @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. */ -+ u64 fragment_core_mask; -+ -+ /** @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. */ -+ u64 tiler_core_mask; -+ -+ /** @max_compute_cores: Maximum number of shader cores used for compute jobs. */ -+ u8 max_compute_cores; -+ -+ /** @max_compute_cores: Maximum number of shader cores used for fragment jobs. */ -+ u8 max_fragment_cores; -+ -+ /** @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. */ -+ u8 max_tiler_cores; -+ -+ /** @priority: Group priority (check panthor_csg_priority). */ -+ u8 priority; -+ -+ /** @blocked_queues: Bitmask reflecting the blocked queues. */ -+ u32 blocked_queues; -+ -+ /** @idle_queues: Bitmask reflecting the blocked queues. */ -+ u32 idle_queues; -+ -+ /** @fatal_lock: Lock used to protect access to fatal fields. */ -+ spinlock_t fatal_lock; -+ -+ /** @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. */ -+ u32 fatal_queues; -+ -+ /** @tiler_oom: Mask of queues that have a tiler OOM event to process. */ -+ atomic_t tiler_oom; -+ -+ /** @queue_count: Number of queues in this group. */ -+ u32 queue_count; -+ -+ /** @queues: Queues owned by this group. */ -+ struct panthor_queue *queues[MAX_CS_PER_CSG]; -+ -+ /** -+ * @csg_id: ID of the FW group slot. -+ * -+ * -1 when the group is not scheduled/active. -+ */ -+ int csg_id; -+ -+ /** -+ * @destroyed: True when the group has been destroyed. -+ * -+ * If a group is destroyed it becomes useless: no further jobs can be submitted -+ * to its queues. We simply wait for all references to be dropped so we can -+ * release the group object. -+ */ -+ bool destroyed; -+ -+ /** -+ * @timedout: True when a timeout occurred on any of the queues owned by -+ * this group. -+ * -+ * Timeouts can be reported by drm_sched or by the FW. In any case, any -+ * timeout situation is unrecoverable, and the group becomes useless. -+ * We simply wait for all references to be dropped so we can release the -+ * group object. -+ */ -+ bool timedout; -+ -+ /** -+ * @syncobjs: Pool of per-queue synchronization objects. -+ * -+ * One sync object per queue. The position of the sync object is -+ * determined by the queue index. -+ */ -+ struct panthor_kernel_bo *syncobjs; -+ -+ /** @state: Group state. */ -+ enum panthor_group_state state; -+ -+ /** -+ * @suspend_buf: Suspend buffer. -+ * -+ * Stores the state of the group and its queues when a group is suspended. -+ * Used at resume time to restore the group in its previous state. -+ * -+ * The size of the suspend buffer is exposed through the FW interface. -+ */ -+ struct panthor_kernel_bo *suspend_buf; -+ -+ /** -+ * @protm_suspend_buf: Protection mode suspend buffer. -+ * -+ * Stores the state of the group and its queues when a group that's in -+ * protection mode is suspended. -+ * -+ * Used at resume time to restore the group in its previous state. -+ * -+ * The size of the protection mode suspend buffer is exposed through the -+ * FW interface. -+ */ -+ struct panthor_kernel_bo *protm_suspend_buf; -+ -+ /** @sync_upd_work: Work used to check/signal job fences. */ -+ struct work_struct sync_upd_work; -+ -+ /** @tiler_oom_work: Work used to process tiler OOM events happening on this group. */ -+ struct work_struct tiler_oom_work; -+ -+ /** @term_work: Work used to finish the group termination procedure. */ -+ struct work_struct term_work; -+ -+ /** -+ * @release_work: Work used to release group resources. -+ * -+ * We need to postpone the group release to avoid a deadlock when -+ * the last ref is released in the tick work. -+ */ -+ struct work_struct release_work; -+ -+ /** -+ * @run_node: Node used to insert the group in the -+ * panthor_group::groups::{runnable,idle} and -+ * panthor_group::reset.stopped_groups lists. -+ */ -+ struct list_head run_node; -+ -+ /** -+ * @wait_node: Node used to insert the group in the -+ * panthor_group::groups::waiting list. -+ */ -+ struct list_head wait_node; -+}; -+ -+/** -+ * group_queue_work() - Queue a group work -+ * @group: Group to queue the work for. -+ * @wname: Work name. -+ * -+ * Grabs a ref and queue a work item to the scheduler workqueue. If -+ * the work was already queued, we release the reference we grabbed. -+ * -+ * Work callbacks must release the reference we grabbed here. -+ */ -+#define group_queue_work(group, wname) \ -+ do { \ -+ group_get(group); \ -+ if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \ -+ group_put(group); \ -+ } while (0) -+ -+/** -+ * sched_queue_work() - Queue a scheduler work. -+ * @sched: Scheduler object. -+ * @wname: Work name. -+ * -+ * Conditionally queues a scheduler work if no reset is pending/in-progress. -+ */ -+#define sched_queue_work(sched, wname) \ -+ do { \ -+ if (!atomic_read(&sched->reset.in_progress) && \ -+ !panthor_device_reset_is_pending((sched)->ptdev)) \ -+ queue_work((sched)->wq, &(sched)->wname ## _work); \ -+ } while (0) -+ -+/** -+ * sched_queue_delayed_work() - Queue a scheduler delayed work. -+ * @sched: Scheduler object. -+ * @wname: Work name. -+ * @delay: Work delay in jiffies. -+ * -+ * Conditionally queues a scheduler delayed work if no reset is -+ * pending/in-progress. -+ */ -+#define sched_queue_delayed_work(sched, wname, delay) \ -+ do { \ -+ if (!atomic_read(&sched->reset.in_progress) && \ -+ !panthor_device_reset_is_pending((sched)->ptdev)) \ -+ mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \ -+ } while (0) -+ -+/* -+ * We currently set the maximum of groups per file to an arbitrary low value. -+ * But this can be updated if we need more. -+ */ -+#define MAX_GROUPS_PER_POOL 128 -+ -+/** -+ * struct panthor_group_pool - Group pool -+ * -+ * Each file get assigned a group pool. -+ */ -+struct panthor_group_pool { -+ /** @xa: Xarray used to manage group handles. */ -+ struct xarray xa; -+}; -+ -+/** -+ * struct panthor_job - Used to manage GPU job -+ */ -+struct panthor_job { -+ /** @base: Inherit from drm_sched_job. */ -+ struct drm_sched_job base; -+ -+ /** @refcount: Reference count. */ -+ struct kref refcount; -+ -+ /** @group: Group of the queue this job will be pushed to. */ -+ struct panthor_group *group; -+ -+ /** @queue_idx: Index of the queue inside @group. */ -+ u32 queue_idx; -+ -+ /** @call_info: Information about the userspace command stream call. */ -+ struct { -+ /** @start: GPU address of the userspace command stream. */ -+ u64 start; -+ -+ /** @size: Size of the userspace command stream. */ -+ u32 size; -+ -+ /** -+ * @latest_flush: Flush ID at the time the userspace command -+ * stream was built. -+ * -+ * Needed for the flush reduction mechanism. -+ */ -+ u32 latest_flush; -+ } call_info; -+ -+ /** @ringbuf: Position of this job is in the ring buffer. */ -+ struct { -+ /** @start: Start offset. */ -+ u64 start; -+ -+ /** @end: End offset. */ -+ u64 end; -+ } ringbuf; -+ -+ /** -+ * @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs -+ * list. -+ */ -+ struct list_head node; -+ -+ /** @done_fence: Fence signaled when the job is finished or cancelled. */ -+ struct dma_fence *done_fence; -+}; -+ -+static void -+panthor_queue_put_syncwait_obj(struct panthor_queue *queue) -+{ -+ if (queue->syncwait.kmap) { -+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap); -+ -+ drm_gem_vunmap_unlocked(queue->syncwait.obj, &map); -+ queue->syncwait.kmap = NULL; -+ } -+ -+ drm_gem_object_put(queue->syncwait.obj); -+ queue->syncwait.obj = NULL; -+} -+ -+static void * -+panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue *queue) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_gem_object *bo; -+ struct iosys_map map; -+ int ret; -+ -+ if (queue->syncwait.kmap) -+ return queue->syncwait.kmap + queue->syncwait.offset; -+ -+ bo = panthor_vm_get_bo_for_va(group->vm, -+ queue->syncwait.gpu_va, -+ &queue->syncwait.offset); -+ if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo))) -+ goto err_put_syncwait_obj; -+ -+ queue->syncwait.obj = &bo->base.base; -+ ret = drm_gem_vmap_unlocked(queue->syncwait.obj, &map); -+ if (drm_WARN_ON(&ptdev->base, ret)) -+ goto err_put_syncwait_obj; -+ -+ queue->syncwait.kmap = map.vaddr; -+ if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap)) -+ goto err_put_syncwait_obj; -+ -+ return queue->syncwait.kmap + queue->syncwait.offset; -+ -+err_put_syncwait_obj: -+ panthor_queue_put_syncwait_obj(queue); -+ return NULL; -+} -+ -+static void group_free_queue(struct panthor_group *group, struct panthor_queue *queue) -+{ -+ if (IS_ERR_OR_NULL(queue)) -+ return; -+ -+ if (queue->entity.fence_context) -+ drm_sched_entity_destroy(&queue->entity); -+ -+ if (queue->scheduler.ops) -+ drm_sched_fini(&queue->scheduler); -+ -+ panthor_queue_put_syncwait_obj(queue); -+ -+ panthor_kernel_bo_destroy(group->vm, queue->ringbuf); -+ panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem); -+ -+ kfree(queue); -+} -+ -+static void group_release_work(struct work_struct *work) -+{ -+ struct panthor_group *group = container_of(work, -+ struct panthor_group, -+ release_work); -+ struct panthor_device *ptdev = group->ptdev; -+ u32 i; -+ -+ for (i = 0; i < group->queue_count; i++) -+ group_free_queue(group, group->queues[i]); -+ -+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf); -+ panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf); -+ -+ if (!IS_ERR_OR_NULL(group->syncobjs)) -+ panthor_kernel_bo_destroy(group->vm, group->syncobjs); -+ -+ panthor_vm_put(group->vm); -+ kfree(group); -+} -+ -+static void group_release(struct kref *kref) -+{ -+ struct panthor_group *group = container_of(kref, -+ struct panthor_group, -+ refcount); -+ struct panthor_device *ptdev = group->ptdev; -+ -+ drm_WARN_ON(&ptdev->base, group->csg_id >= 0); -+ drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node)); -+ drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node)); -+ -+ queue_work(panthor_cleanup_wq, &group->release_work); -+} -+ -+static void group_put(struct panthor_group *group) -+{ -+ if (group) -+ kref_put(&group->refcount, group_release); -+} -+ -+static struct panthor_group * -+group_get(struct panthor_group *group) -+{ -+ if (group) -+ kref_get(&group->refcount); -+ -+ return group; -+} -+ -+/** -+ * group_bind_locked() - Bind a group to a group slot -+ * @group: Group. -+ * @csg_id: Slot. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+group_bind_locked(struct panthor_group *group, u32 csg_id) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_csg_slot *csg_slot; -+ int ret; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ if (drm_WARN_ON(&ptdev->base, group->csg_id != -1 || csg_id >= MAX_CSGS || -+ ptdev->scheduler->csg_slots[csg_id].group)) -+ return -EINVAL; -+ -+ ret = panthor_vm_active(group->vm); -+ if (ret) -+ return ret; -+ -+ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ group_get(group); -+ group->csg_id = csg_id; -+ -+ /* Dummy doorbell allocation: doorbell is assigned to the group and -+ * all queues use the same doorbell. -+ * -+ * TODO: Implement LRU-based doorbell assignment, so the most often -+ * updated queues get their own doorbell, thus avoiding useless checks -+ * on queues belonging to the same group that are rarely updated. -+ */ -+ for (u32 i = 0; i < group->queue_count; i++) -+ group->queues[i]->doorbell_id = csg_id + 1; -+ -+ csg_slot->group = group; -+ -+ return 0; -+} -+ -+/** -+ * group_unbind_locked() - Unbind a group from a slot. -+ * @group: Group to unbind. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+group_unbind_locked(struct panthor_group *group) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_csg_slot *slot; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ if (drm_WARN_ON(&ptdev->base, group->csg_id < 0 || group->csg_id >= MAX_CSGS)) -+ return -EINVAL; -+ -+ if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE)) -+ return -EINVAL; -+ -+ slot = &ptdev->scheduler->csg_slots[group->csg_id]; -+ panthor_vm_idle(group->vm); -+ group->csg_id = -1; -+ -+ /* Tiler OOM events will be re-issued next time the group is scheduled. */ -+ atomic_set(&group->tiler_oom, 0); -+ cancel_work(&group->tiler_oom_work); -+ -+ for (u32 i = 0; i < group->queue_count; i++) -+ group->queues[i]->doorbell_id = -1; -+ -+ slot->group = NULL; -+ -+ group_put(group); -+ return 0; -+} -+ -+/** -+ * cs_slot_prog_locked() - Program a queue slot -+ * @ptdev: Device. -+ * @csg_id: Group slot ID. -+ * @cs_id: Queue slot ID. -+ * -+ * Program a queue slot with the queue information so things can start being -+ * executed on this queue. -+ * -+ * The group slot must have a group bound to it already (group_bind_locked()). -+ */ -+static void -+cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) -+{ -+ struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id]; -+ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ queue->iface.input->extract = queue->iface.output->extract; -+ drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract); -+ -+ cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(queue->ringbuf); -+ cs_iface->input->ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); -+ cs_iface->input->ringbuf_input = queue->iface.input_fw_va; -+ cs_iface->input->ringbuf_output = queue->iface.output_fw_va; -+ cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) | -+ CS_CONFIG_DOORBELL(queue->doorbell_id); -+ cs_iface->input->ack_irq_mask = ~0; -+ panthor_fw_update_reqs(cs_iface, req, -+ CS_IDLE_SYNC_WAIT | -+ CS_IDLE_EMPTY | -+ CS_STATE_START | -+ CS_EXTRACT_EVENT, -+ CS_IDLE_SYNC_WAIT | -+ CS_IDLE_EMPTY | -+ CS_STATE_MASK | -+ CS_EXTRACT_EVENT); -+ if (queue->iface.input->insert != queue->iface.input->extract && queue->timeout_suspended) { -+ drm_sched_resume_timeout(&queue->scheduler, queue->remaining_time); -+ queue->timeout_suspended = false; -+ } -+} -+ -+/** -+ * @cs_slot_reset_locked() - Reset a queue slot -+ * @ptdev: Device. -+ * @csg_id: Group slot. -+ * @cs_id: Queue slot. -+ * -+ * Change the queue slot state to STOP and suspend the queue timeout if -+ * the queue is not blocked. -+ * -+ * The group slot must have a group bound to it (group_bind_locked()). -+ */ -+static int -+cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) -+{ -+ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; -+ struct panthor_queue *queue = group->queues[cs_id]; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ panthor_fw_update_reqs(cs_iface, req, -+ CS_STATE_STOP, -+ CS_STATE_MASK); -+ -+ /* If the queue is blocked, we want to keep the timeout running, so -+ * we can detect unbounded waits and kill the group when that happens. -+ */ -+ if (!(group->blocked_queues & BIT(cs_id)) && !queue->timeout_suspended) { -+ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); -+ queue->timeout_suspended = true; -+ WARN_ON(queue->remaining_time > msecs_to_jiffies(JOB_TIMEOUT_MS)); -+ } -+ -+ return 0; -+} -+ -+/** -+ * csg_slot_sync_priority_locked() - Synchronize the group slot priority -+ * @ptdev: Device. -+ * @csg_id: Group slot ID. -+ * -+ * Group slot priority update happens asynchronously. When we receive a -+ * %CSG_ENDPOINT_CONFIG, we know the update is effective, and can -+ * reflect it to our panthor_csg_slot object. -+ */ -+static void -+csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ struct panthor_fw_csg_iface *csg_iface; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ csg_slot->priority = (csg_iface->input->endpoint_req & CSG_EP_REQ_PRIORITY_MASK) >> 28; -+} -+ -+/** -+ * cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority -+ * @ptdev: Device. -+ * @csg_id: Group slot. -+ * @cs_id: Queue slot. -+ * -+ * Queue state is updated on group suspend or STATUS_UPDATE event. -+ */ -+static void -+cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id) -+{ -+ struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group; -+ struct panthor_queue *queue = group->queues[cs_id]; -+ struct panthor_fw_cs_iface *cs_iface = -+ panthor_fw_get_cs_iface(group->ptdev, csg_id, cs_id); -+ -+ u32 status_wait_cond; -+ -+ switch (cs_iface->output->status_blocked_reason) { -+ case CS_STATUS_BLOCKED_REASON_UNBLOCKED: -+ if (queue->iface.input->insert == queue->iface.output->extract && -+ cs_iface->output->status_scoreboards == 0) -+ group->idle_queues |= BIT(cs_id); -+ break; -+ -+ case CS_STATUS_BLOCKED_REASON_SYNC_WAIT: -+ list_move_tail(&group->wait_node, &group->ptdev->scheduler->groups.waiting); -+ group->blocked_queues |= BIT(cs_id); -+ queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr; -+ queue->syncwait.ref = cs_iface->output->status_wait_sync_value; -+ status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK; -+ queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT; -+ if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) { -+ u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi; -+ -+ queue->syncwait.sync64 = true; -+ queue->syncwait.ref |= sync_val_hi << 32; -+ } else { -+ queue->syncwait.sync64 = false; -+ } -+ break; -+ -+ default: -+ /* Other reasons are not blocking. Consider the queue as runnable -+ * in those cases. -+ */ -+ break; -+ } -+} -+ -+static void -+csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ u32 i; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ group->idle_queues = 0; -+ group->blocked_queues = 0; -+ -+ for (i = 0; i < group->queue_count; i++) { -+ if (group->queues[i]) -+ cs_slot_sync_queue_state_locked(ptdev, csg_id, i); -+ } -+} -+ -+static void -+csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ struct panthor_fw_csg_iface *csg_iface; -+ struct panthor_group *group; -+ enum panthor_group_state new_state, old_state; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ group = csg_slot->group; -+ -+ if (!group) -+ return; -+ -+ old_state = group->state; -+ switch (csg_iface->output->ack & CSG_STATE_MASK) { -+ case CSG_STATE_START: -+ case CSG_STATE_RESUME: -+ new_state = PANTHOR_CS_GROUP_ACTIVE; -+ break; -+ case CSG_STATE_TERMINATE: -+ new_state = PANTHOR_CS_GROUP_TERMINATED; -+ break; -+ case CSG_STATE_SUSPEND: -+ new_state = PANTHOR_CS_GROUP_SUSPENDED; -+ break; -+ } -+ -+ if (old_state == new_state) -+ return; -+ -+ if (new_state == PANTHOR_CS_GROUP_SUSPENDED) -+ csg_slot_sync_queues_state_locked(ptdev, csg_id); -+ -+ if (old_state == PANTHOR_CS_GROUP_ACTIVE) { -+ u32 i; -+ -+ /* Reset the queue slots so we start from a clean -+ * state when starting/resuming a new group on this -+ * CSG slot. No wait needed here, and no ringbell -+ * either, since the CS slot will only be re-used -+ * on the next CSG start operation. -+ */ -+ for (i = 0; i < group->queue_count; i++) { -+ if (group->queues[i]) -+ cs_slot_reset_locked(ptdev, csg_id, i); -+ } -+ } -+ -+ group->state = new_state; -+} -+ -+static int -+csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority) -+{ -+ struct panthor_fw_csg_iface *csg_iface; -+ struct panthor_csg_slot *csg_slot; -+ struct panthor_group *group; -+ u32 queue_mask = 0, i; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ if (priority > MAX_CSG_PRIO) -+ return -EINVAL; -+ -+ if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS)) -+ return -EINVAL; -+ -+ csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ group = csg_slot->group; -+ if (!group || group->state == PANTHOR_CS_GROUP_ACTIVE) -+ return 0; -+ -+ csg_iface = panthor_fw_get_csg_iface(group->ptdev, csg_id); -+ -+ for (i = 0; i < group->queue_count; i++) { -+ if (group->queues[i]) { -+ cs_slot_prog_locked(ptdev, csg_id, i); -+ queue_mask |= BIT(i); -+ } -+ } -+ -+ csg_iface->input->allow_compute = group->compute_core_mask; -+ csg_iface->input->allow_fragment = group->fragment_core_mask; -+ csg_iface->input->allow_other = group->tiler_core_mask; -+ csg_iface->input->endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) | -+ CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) | -+ CSG_EP_REQ_TILER(group->max_tiler_cores) | -+ CSG_EP_REQ_PRIORITY(priority); -+ csg_iface->input->config = panthor_vm_as(group->vm); -+ -+ if (group->suspend_buf) -+ csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(group->suspend_buf); -+ else -+ csg_iface->input->suspend_buf = 0; -+ -+ if (group->protm_suspend_buf) { -+ csg_iface->input->protm_suspend_buf = -+ panthor_kernel_bo_gpuva(group->protm_suspend_buf); -+ } else { -+ csg_iface->input->protm_suspend_buf = 0; -+ } -+ -+ csg_iface->input->ack_irq_mask = ~0; -+ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask); -+ return 0; -+} -+ -+static void -+cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, -+ u32 csg_id, u32 cs_id) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ struct panthor_fw_cs_iface *cs_iface; -+ u32 fatal; -+ u64 info; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ fatal = cs_iface->output->fatal; -+ info = cs_iface->output->fatal_info; -+ -+ if (group) -+ group->fatal_queues |= BIT(cs_id); -+ -+ sched_queue_delayed_work(sched, tick, 0); -+ drm_warn(&ptdev->base, -+ "CSG slot %d CS slot: %d\n" -+ "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" -+ "CS_FATAL.EXCEPTION_DATA: 0x%x\n" -+ "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n", -+ csg_id, cs_id, -+ (unsigned int)CS_EXCEPTION_TYPE(fatal), -+ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)), -+ (unsigned int)CS_EXCEPTION_DATA(fatal), -+ info); -+} -+ -+static void -+cs_slot_process_fault_event_locked(struct panthor_device *ptdev, -+ u32 csg_id, u32 cs_id) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ struct panthor_queue *queue = group && cs_id < group->queue_count ? -+ group->queues[cs_id] : NULL; -+ struct panthor_fw_cs_iface *cs_iface; -+ u32 fault; -+ u64 info; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ fault = cs_iface->output->fault; -+ info = cs_iface->output->fault_info; -+ -+ if (queue && CS_EXCEPTION_TYPE(fault) == DRM_PANTHOR_EXCEPTION_CS_INHERIT_FAULT) { -+ u64 cs_extract = queue->iface.output->extract; -+ struct panthor_job *job; -+ -+ spin_lock(&queue->fence_ctx.lock); -+ list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) { -+ if (cs_extract >= job->ringbuf.end) -+ continue; -+ -+ if (cs_extract < job->ringbuf.start) -+ break; -+ -+ dma_fence_set_error(job->done_fence, -EINVAL); -+ } -+ spin_unlock(&queue->fence_ctx.lock); -+ } -+ -+ drm_warn(&ptdev->base, -+ "CSG slot %d CS slot: %d\n" -+ "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n" -+ "CS_FAULT.EXCEPTION_DATA: 0x%x\n" -+ "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n", -+ csg_id, cs_id, -+ (unsigned int)CS_EXCEPTION_TYPE(fault), -+ panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)), -+ (unsigned int)CS_EXCEPTION_DATA(fault), -+ info); -+} -+ -+static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ u32 renderpasses_in_flight, pending_frag_count; -+ struct panthor_heap_pool *heaps = NULL; -+ u64 heap_address, new_chunk_va = 0; -+ u32 vt_start, vt_end, frag_end; -+ int ret, csg_id; -+ -+ mutex_lock(&sched->lock); -+ csg_id = group->csg_id; -+ if (csg_id >= 0) { -+ struct panthor_fw_cs_iface *cs_iface; -+ -+ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ heaps = panthor_vm_get_heap_pool(group->vm, false); -+ heap_address = cs_iface->output->heap_address; -+ vt_start = cs_iface->output->heap_vt_start; -+ vt_end = cs_iface->output->heap_vt_end; -+ frag_end = cs_iface->output->heap_frag_end; -+ renderpasses_in_flight = vt_start - frag_end; -+ pending_frag_count = vt_end - frag_end; -+ } -+ mutex_unlock(&sched->lock); -+ -+ /* The group got scheduled out, we stop here. We will get a new tiler OOM event -+ * when it's scheduled again. -+ */ -+ if (unlikely(csg_id < 0)) -+ return 0; -+ -+ if (!heaps || frag_end > vt_end || vt_end >= vt_start) { -+ ret = -EINVAL; -+ } else { -+ /* We do the allocation without holding the scheduler lock to avoid -+ * blocking the scheduling. -+ */ -+ ret = panthor_heap_grow(heaps, heap_address, -+ renderpasses_in_flight, -+ pending_frag_count, &new_chunk_va); -+ } -+ -+ if (ret && ret != -EBUSY) { -+ group->fatal_queues |= BIT(csg_id); -+ sched_queue_delayed_work(sched, tick, 0); -+ goto out_put_heap_pool; -+ } -+ -+ mutex_lock(&sched->lock); -+ csg_id = group->csg_id; -+ if (csg_id >= 0) { -+ struct panthor_fw_csg_iface *csg_iface; -+ struct panthor_fw_cs_iface *cs_iface; -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ -+ cs_iface->input->heap_start = new_chunk_va; -+ cs_iface->input->heap_end = new_chunk_va; -+ panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM); -+ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id)); -+ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); -+ } -+ mutex_unlock(&sched->lock); -+ -+ /* We allocated a chunck, but couldn't link it to the heap -+ * context because the group was scheduled out while we were -+ * allocating memory. We need to return this chunk to the heap. -+ */ -+ if (unlikely(csg_id < 0 && new_chunk_va)) -+ panthor_heap_return_chunk(heaps, heap_address, new_chunk_va); -+ -+ ret = 0; -+ -+out_put_heap_pool: -+ panthor_heap_pool_put(heaps); -+ return ret; -+} -+ -+static void group_tiler_oom_work(struct work_struct *work) -+{ -+ struct panthor_group *group = -+ container_of(work, struct panthor_group, tiler_oom_work); -+ u32 tiler_oom = atomic_xchg(&group->tiler_oom, 0); -+ -+ while (tiler_oom) { -+ u32 cs_id = ffs(tiler_oom) - 1; -+ -+ group_process_tiler_oom(group, cs_id); -+ tiler_oom &= ~BIT(cs_id); -+ } -+ -+ group_put(group); -+} -+ -+static void -+cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev, -+ u32 csg_id, u32 cs_id) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ if (drm_WARN_ON(&ptdev->base, !group)) -+ return; -+ -+ atomic_or(BIT(cs_id), &group->tiler_oom); -+ -+ /* We don't use group_queue_work() here because we want to queue the -+ * work item to the heap_alloc_wq. -+ */ -+ group_get(group); -+ if (!queue_work(sched->heap_alloc_wq, &group->tiler_oom_work)) -+ group_put(group); -+} -+ -+static bool cs_slot_process_irq_locked(struct panthor_device *ptdev, -+ u32 csg_id, u32 cs_id) -+{ -+ struct panthor_fw_cs_iface *cs_iface; -+ u32 req, ack, events; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ cs_iface = panthor_fw_get_cs_iface(ptdev, csg_id, cs_id); -+ req = cs_iface->input->req; -+ ack = cs_iface->output->ack; -+ events = (req ^ ack) & CS_EVT_MASK; -+ -+ if (events & CS_FATAL) -+ cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id); -+ -+ if (events & CS_FAULT) -+ cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id); -+ -+ if (events & CS_TILER_OOM) -+ cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id); -+ -+ /* We don't acknowledge the TILER_OOM event since its handling is -+ * deferred to a separate work. -+ */ -+ panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL | CS_FAULT); -+ -+ return (events & (CS_FAULT | CS_TILER_OOM)) != 0; -+} -+ -+static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ struct panthor_fw_csg_iface *csg_iface; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE; -+} -+ -+static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ sched->might_have_idle_groups = true; -+ -+ /* Schedule a tick so we can evict idle groups and schedule non-idle -+ * ones. This will also update runtime PM and devfreq busy/idle states, -+ * so the device can lower its frequency or get suspended. -+ */ -+ sched_queue_delayed_work(sched, tick, 0); -+} -+ -+static void csg_slot_sync_update_locked(struct panthor_device *ptdev, -+ u32 csg_id) -+{ -+ struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ if (group) -+ group_queue_work(group, sync_upd); -+ -+ sched_queue_work(ptdev->scheduler, sync_upd); -+} -+ -+static void -+csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ struct panthor_group *group = csg_slot->group; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id); -+ -+ group = csg_slot->group; -+ if (!drm_WARN_ON(&ptdev->base, !group)) -+ group->timedout = true; -+ -+ sched_queue_delayed_work(sched, tick, 0); -+} -+ -+static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id) -+{ -+ u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events; -+ struct panthor_fw_csg_iface *csg_iface; -+ u32 ring_cs_db_mask = 0; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) -+ return; -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ req = READ_ONCE(csg_iface->input->req); -+ ack = READ_ONCE(csg_iface->output->ack); -+ cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req); -+ cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack); -+ csg_events = (req ^ ack) & CSG_EVT_MASK; -+ -+ /* There may not be any pending CSG/CS interrupts to process */ -+ if (req == ack && cs_irq_req == cs_irq_ack) -+ return; -+ -+ /* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before -+ * examining the CS_ACK & CS_REQ bits. This would ensure that Host -+ * doesn't miss an interrupt for the CS in the race scenario where -+ * whilst Host is servicing an interrupt for the CS, firmware sends -+ * another interrupt for that CS. -+ */ -+ csg_iface->input->cs_irq_ack = cs_irq_req; -+ -+ panthor_fw_update_reqs(csg_iface, req, ack, -+ CSG_SYNC_UPDATE | -+ CSG_IDLE | -+ CSG_PROGRESS_TIMER_EVENT); -+ -+ if (csg_events & CSG_IDLE) -+ csg_slot_process_idle_event_locked(ptdev, csg_id); -+ -+ if (csg_events & CSG_PROGRESS_TIMER_EVENT) -+ csg_slot_process_progress_timer_event_locked(ptdev, csg_id); -+ -+ cs_irqs = cs_irq_req ^ cs_irq_ack; -+ while (cs_irqs) { -+ u32 cs_id = ffs(cs_irqs) - 1; -+ -+ if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id)) -+ ring_cs_db_mask |= BIT(cs_id); -+ -+ cs_irqs &= ~BIT(cs_id); -+ } -+ -+ if (csg_events & CSG_SYNC_UPDATE) -+ csg_slot_sync_update_locked(ptdev, csg_id); -+ -+ if (ring_cs_db_mask) -+ panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask); -+ -+ panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id)); -+} -+ -+static void sched_process_idle_event_locked(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ /* Acknowledge the idle event and schedule a tick. */ -+ panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE); -+ sched_queue_delayed_work(ptdev->scheduler, tick, 0); -+} -+ -+/** -+ * panthor_sched_process_global_irq() - Process the scheduling part of a global IRQ -+ * @ptdev: Device. -+ */ -+static void sched_process_global_irq_locked(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ u32 req, ack, evts; -+ -+ lockdep_assert_held(&ptdev->scheduler->lock); -+ -+ req = READ_ONCE(glb_iface->input->req); -+ ack = READ_ONCE(glb_iface->output->ack); -+ evts = (req ^ ack) & GLB_EVT_MASK; -+ -+ if (evts & GLB_IDLE) -+ sched_process_idle_event_locked(ptdev); -+} -+ -+static void process_fw_events_work(struct work_struct *work) -+{ -+ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, -+ fw_events_work); -+ u32 events = atomic_xchg(&sched->fw_events, 0); -+ struct panthor_device *ptdev = sched->ptdev; -+ -+ mutex_lock(&sched->lock); -+ -+ if (events & JOB_INT_GLOBAL_IF) { -+ sched_process_global_irq_locked(ptdev); -+ events &= ~JOB_INT_GLOBAL_IF; -+ } -+ -+ while (events) { -+ u32 csg_id = ffs(events) - 1; -+ -+ sched_process_csg_irq_locked(ptdev, csg_id); -+ events &= ~BIT(csg_id); -+ } -+ -+ mutex_unlock(&sched->lock); -+} -+ -+/** -+ * panthor_sched_report_fw_events() - Report FW events to the scheduler. -+ */ -+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events) -+{ -+ if (!ptdev->scheduler) -+ return; -+ -+ atomic_or(events, &ptdev->scheduler->fw_events); -+ sched_queue_work(ptdev->scheduler, fw_events); -+} -+ -+static const char *fence_get_driver_name(struct dma_fence *fence) -+{ -+ return "panthor"; -+} -+ -+static const char *queue_fence_get_timeline_name(struct dma_fence *fence) -+{ -+ return "queue-fence"; -+} -+ -+static const struct dma_fence_ops panthor_queue_fence_ops = { -+ .get_driver_name = fence_get_driver_name, -+ .get_timeline_name = queue_fence_get_timeline_name, -+}; -+ -+/** -+ */ -+struct panthor_csg_slots_upd_ctx { -+ u32 update_mask; -+ u32 timedout_mask; -+ struct { -+ u32 value; -+ u32 mask; -+ } requests[MAX_CSGS]; -+}; -+ -+static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx) -+{ -+ memset(ctx, 0, sizeof(*ctx)); -+} -+ -+static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev, -+ struct panthor_csg_slots_upd_ctx *ctx, -+ u32 csg_id, u32 value, u32 mask) -+{ -+ if (drm_WARN_ON(&ptdev->base, !mask) || -+ drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count)) -+ return; -+ -+ ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) | (value & mask); -+ ctx->requests[csg_id].mask |= mask; -+ ctx->update_mask |= BIT(csg_id); -+} -+ -+static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev, -+ struct panthor_csg_slots_upd_ctx *ctx) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ u32 update_slots = ctx->update_mask; -+ -+ lockdep_assert_held(&sched->lock); -+ -+ if (!ctx->update_mask) -+ return 0; -+ -+ while (update_slots) { -+ struct panthor_fw_csg_iface *csg_iface; -+ u32 csg_id = ffs(update_slots) - 1; -+ -+ update_slots &= ~BIT(csg_id); -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ panthor_fw_update_reqs(csg_iface, req, -+ ctx->requests[csg_id].value, -+ ctx->requests[csg_id].mask); -+ } -+ -+ panthor_fw_ring_csg_doorbells(ptdev, ctx->update_mask); -+ -+ update_slots = ctx->update_mask; -+ while (update_slots) { -+ struct panthor_fw_csg_iface *csg_iface; -+ u32 csg_id = ffs(update_slots) - 1; -+ u32 req_mask = ctx->requests[csg_id].mask, acked; -+ int ret; -+ -+ update_slots &= ~BIT(csg_id); -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ -+ ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, &acked, 100); -+ -+ if (acked & CSG_ENDPOINT_CONFIG) -+ csg_slot_sync_priority_locked(ptdev, csg_id); -+ -+ if (acked & CSG_STATE_MASK) -+ csg_slot_sync_state_locked(ptdev, csg_id); -+ -+ if (acked & CSG_STATUS_UPDATE) { -+ csg_slot_sync_queues_state_locked(ptdev, csg_id); -+ csg_slot_sync_idle_state_locked(ptdev, csg_id); -+ } -+ -+ if (ret && acked != req_mask && -+ ((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != 0) { -+ drm_err(&ptdev->base, "CSG %d update request timedout", csg_id); -+ ctx->timedout_mask |= BIT(csg_id); -+ } -+ } -+ -+ if (ctx->timedout_mask) -+ return -ETIMEDOUT; -+ -+ return 0; -+} -+ -+struct panthor_sched_tick_ctx { -+ struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT]; -+ struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT]; -+ u32 idle_group_count; -+ u32 group_count; -+ enum panthor_csg_priority min_priority; -+ struct panthor_vm *vms[MAX_CS_PER_CSG]; -+ u32 as_count; -+ bool immediate_tick; -+ u32 csg_upd_failed_mask; -+}; -+ -+static bool -+tick_ctx_is_full(const struct panthor_scheduler *sched, -+ const struct panthor_sched_tick_ctx *ctx) -+{ -+ return ctx->group_count == sched->csg_slot_count; -+} -+ -+static bool -+group_is_idle(struct panthor_group *group) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ u32 inactive_queues; -+ -+ if (group->csg_id >= 0) -+ return ptdev->scheduler->csg_slots[group->csg_id].idle; -+ -+ inactive_queues = group->idle_queues | group->blocked_queues; -+ return hweight32(inactive_queues) == group->queue_count; -+} -+ -+static bool -+group_can_run(struct panthor_group *group) -+{ -+ return group->state != PANTHOR_CS_GROUP_TERMINATED && -+ !group->destroyed && group->fatal_queues == 0 && -+ !group->timedout; -+} -+ -+static void -+tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched, -+ struct panthor_sched_tick_ctx *ctx, -+ struct list_head *queue, -+ bool skip_idle_groups, -+ bool owned_by_tick_ctx) -+{ -+ struct panthor_group *group, *tmp; -+ -+ if (tick_ctx_is_full(sched, ctx)) -+ return; -+ -+ list_for_each_entry_safe(group, tmp, queue, run_node) { -+ u32 i; -+ -+ if (!group_can_run(group)) -+ continue; -+ -+ if (skip_idle_groups && group_is_idle(group)) -+ continue; -+ -+ for (i = 0; i < ctx->as_count; i++) { -+ if (ctx->vms[i] == group->vm) -+ break; -+ } -+ -+ if (i == ctx->as_count && ctx->as_count == sched->as_slot_count) -+ continue; -+ -+ if (!owned_by_tick_ctx) -+ group_get(group); -+ -+ list_move_tail(&group->run_node, &ctx->groups[group->priority]); -+ ctx->group_count++; -+ if (group_is_idle(group)) -+ ctx->idle_group_count++; -+ -+ if (i == ctx->as_count) -+ ctx->vms[ctx->as_count++] = group->vm; -+ -+ if (ctx->min_priority > group->priority) -+ ctx->min_priority = group->priority; -+ -+ if (tick_ctx_is_full(sched, ctx)) -+ return; -+ } -+} -+ -+static void -+tick_ctx_insert_old_group(struct panthor_scheduler *sched, -+ struct panthor_sched_tick_ctx *ctx, -+ struct panthor_group *group, -+ bool full_tick) -+{ -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id]; -+ struct panthor_group *other_group; -+ -+ if (!full_tick) { -+ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); -+ return; -+ } -+ -+ /* Rotate to make sure groups with lower CSG slot -+ * priorities have a chance to get a higher CSG slot -+ * priority next time they get picked. This priority -+ * has an impact on resource request ordering, so it's -+ * important to make sure we don't let one group starve -+ * all other groups with the same group priority. -+ */ -+ list_for_each_entry(other_group, -+ &ctx->old_groups[csg_slot->group->priority], -+ run_node) { -+ struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id]; -+ -+ if (other_csg_slot->priority > csg_slot->priority) { -+ list_add_tail(&csg_slot->group->run_node, &other_group->run_node); -+ return; -+ } -+ } -+ -+ list_add_tail(&group->run_node, &ctx->old_groups[group->priority]); -+} -+ -+static void -+tick_ctx_init(struct panthor_scheduler *sched, -+ struct panthor_sched_tick_ctx *ctx, -+ bool full_tick) -+{ -+ struct panthor_device *ptdev = sched->ptdev; -+ struct panthor_csg_slots_upd_ctx upd_ctx; -+ int ret; -+ u32 i; -+ -+ memset(ctx, 0, sizeof(*ctx)); -+ csgs_upd_ctx_init(&upd_ctx); -+ -+ ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT; -+ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { -+ INIT_LIST_HEAD(&ctx->groups[i]); -+ INIT_LIST_HEAD(&ctx->old_groups[i]); -+ } -+ -+ for (i = 0; i < sched->csg_slot_count; i++) { -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; -+ struct panthor_group *group = csg_slot->group; -+ struct panthor_fw_csg_iface *csg_iface; -+ -+ if (!group) -+ continue; -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, i); -+ group_get(group); -+ -+ /* If there was unhandled faults on the VM, force processing of -+ * CSG IRQs, so we can flag the faulty queue. -+ */ -+ if (panthor_vm_has_unhandled_faults(group->vm)) { -+ sched_process_csg_irq_locked(ptdev, i); -+ -+ /* No fatal fault reported, flag all queues as faulty. */ -+ if (!group->fatal_queues) -+ group->fatal_queues |= GENMASK(group->queue_count - 1, 0); -+ } -+ -+ tick_ctx_insert_old_group(sched, ctx, group, full_tick); -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, -+ csg_iface->output->ack ^ CSG_STATUS_UPDATE, -+ CSG_STATUS_UPDATE); -+ } -+ -+ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); -+ if (ret) { -+ panthor_device_schedule_reset(ptdev); -+ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; -+ } -+} -+ -+#define NUM_INSTRS_PER_SLOT 16 -+ -+static void -+group_term_post_processing(struct panthor_group *group) -+{ -+ struct panthor_job *job, *tmp; -+ LIST_HEAD(faulty_jobs); -+ bool cookie; -+ u32 i = 0; -+ -+ if (drm_WARN_ON(&group->ptdev->base, group_can_run(group))) -+ return; -+ -+ cookie = dma_fence_begin_signalling(); -+ for (i = 0; i < group->queue_count; i++) { -+ struct panthor_queue *queue = group->queues[i]; -+ struct panthor_syncobj_64b *syncobj; -+ int err; -+ -+ if (group->fatal_queues & BIT(i)) -+ err = -EINVAL; -+ else if (group->timedout) -+ err = -ETIMEDOUT; -+ else -+ err = -ECANCELED; -+ -+ if (!queue) -+ continue; -+ -+ spin_lock(&queue->fence_ctx.lock); -+ list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) { -+ list_move_tail(&job->node, &faulty_jobs); -+ dma_fence_set_error(job->done_fence, err); -+ dma_fence_signal_locked(job->done_fence); -+ } -+ spin_unlock(&queue->fence_ctx.lock); -+ -+ /* Manually update the syncobj seqno to unblock waiters. */ -+ syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj)); -+ syncobj->status = ~0; -+ syncobj->seqno = atomic64_read(&queue->fence_ctx.seqno); -+ sched_queue_work(group->ptdev->scheduler, sync_upd); -+ } -+ dma_fence_end_signalling(cookie); -+ -+ list_for_each_entry_safe(job, tmp, &faulty_jobs, node) { -+ list_del_init(&job->node); -+ panthor_job_put(&job->base); -+ } -+} -+ -+static void group_term_work(struct work_struct *work) -+{ -+ struct panthor_group *group = -+ container_of(work, struct panthor_group, term_work); -+ -+ group_term_post_processing(group); -+ group_put(group); -+} -+ -+static void -+tick_ctx_cleanup(struct panthor_scheduler *sched, -+ struct panthor_sched_tick_ctx *ctx) -+{ -+ struct panthor_group *group, *tmp; -+ u32 i; -+ -+ for (i = 0; i < ARRAY_SIZE(ctx->old_groups); i++) { -+ list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) { -+ /* If everything went fine, we should only have groups -+ * to be terminated in the old_groups lists. -+ */ -+ drm_WARN_ON(&group->ptdev->base, !ctx->csg_upd_failed_mask && -+ group_can_run(group)); -+ -+ if (!group_can_run(group)) { -+ list_del_init(&group->run_node); -+ list_del_init(&group->wait_node); -+ group_queue_work(group, term); -+ } else if (group->csg_id >= 0) { -+ list_del_init(&group->run_node); -+ } else { -+ list_move(&group->run_node, -+ group_is_idle(group) ? -+ &sched->groups.idle[group->priority] : -+ &sched->groups.runnable[group->priority]); -+ } -+ group_put(group); -+ } -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(ctx->groups); i++) { -+ /* If everything went fine, the groups to schedule lists should -+ * be empty. -+ */ -+ drm_WARN_ON(&group->ptdev->base, -+ !ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i])); -+ -+ list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) { -+ if (group->csg_id >= 0) { -+ list_del_init(&group->run_node); -+ } else { -+ list_move(&group->run_node, -+ group_is_idle(group) ? -+ &sched->groups.idle[group->priority] : -+ &sched->groups.runnable[group->priority]); -+ } -+ group_put(group); -+ } -+ } -+} -+ -+static void -+tick_ctx_apply(struct panthor_scheduler *sched, struct panthor_sched_tick_ctx *ctx) -+{ -+ struct panthor_group *group, *tmp; -+ struct panthor_device *ptdev = sched->ptdev; -+ struct panthor_csg_slot *csg_slot; -+ int prio, new_csg_prio = MAX_CSG_PRIO, i; -+ u32 csg_mod_mask = 0, free_csg_slots = 0; -+ struct panthor_csg_slots_upd_ctx upd_ctx; -+ int ret; -+ -+ csgs_upd_ctx_init(&upd_ctx); -+ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ /* Suspend or terminate evicted groups. */ -+ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { -+ bool term = !group_can_run(group); -+ int csg_id = group->csg_id; -+ -+ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) -+ continue; -+ -+ csg_slot = &sched->csg_slots[csg_id]; -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, -+ term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND, -+ CSG_STATE_MASK); -+ } -+ -+ /* Update priorities on already running groups. */ -+ list_for_each_entry(group, &ctx->groups[prio], run_node) { -+ struct panthor_fw_csg_iface *csg_iface; -+ int csg_id = group->csg_id; -+ -+ if (csg_id < 0) { -+ new_csg_prio--; -+ continue; -+ } -+ -+ csg_slot = &sched->csg_slots[csg_id]; -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ if (csg_slot->priority == new_csg_prio) { -+ new_csg_prio--; -+ continue; -+ } -+ -+ panthor_fw_update_reqs(csg_iface, endpoint_req, -+ CSG_EP_REQ_PRIORITY(new_csg_prio), -+ CSG_EP_REQ_PRIORITY_MASK); -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, -+ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, -+ CSG_ENDPOINT_CONFIG); -+ new_csg_prio--; -+ } -+ } -+ -+ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); -+ if (ret) { -+ panthor_device_schedule_reset(ptdev); -+ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; -+ return; -+ } -+ -+ /* Unbind evicted groups. */ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ list_for_each_entry(group, &ctx->old_groups[prio], run_node) { -+ /* This group is gone. Process interrupts to clear -+ * any pending interrupts before we start the new -+ * group. -+ */ -+ if (group->csg_id >= 0) -+ sched_process_csg_irq_locked(ptdev, group->csg_id); -+ -+ group_unbind_locked(group); -+ } -+ } -+ -+ for (i = 0; i < sched->csg_slot_count; i++) { -+ if (!sched->csg_slots[i].group) -+ free_csg_slots |= BIT(i); -+ } -+ -+ csgs_upd_ctx_init(&upd_ctx); -+ new_csg_prio = MAX_CSG_PRIO; -+ -+ /* Start new groups. */ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ list_for_each_entry(group, &ctx->groups[prio], run_node) { -+ int csg_id = group->csg_id; -+ struct panthor_fw_csg_iface *csg_iface; -+ -+ if (csg_id >= 0) { -+ new_csg_prio--; -+ continue; -+ } -+ -+ csg_id = ffs(free_csg_slots) - 1; -+ if (drm_WARN_ON(&ptdev->base, csg_id < 0)) -+ break; -+ -+ csg_iface = panthor_fw_get_csg_iface(ptdev, csg_id); -+ csg_slot = &sched->csg_slots[csg_id]; -+ csg_mod_mask |= BIT(csg_id); -+ group_bind_locked(group, csg_id); -+ csg_slot_prog_locked(ptdev, csg_id, new_csg_prio--); -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, -+ group->state == PANTHOR_CS_GROUP_SUSPENDED ? -+ CSG_STATE_RESUME : CSG_STATE_START, -+ CSG_STATE_MASK); -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, -+ csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG, -+ CSG_ENDPOINT_CONFIG); -+ free_csg_slots &= ~BIT(csg_id); -+ } -+ } -+ -+ ret = csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); -+ if (ret) { -+ panthor_device_schedule_reset(ptdev); -+ ctx->csg_upd_failed_mask |= upd_ctx.timedout_mask; -+ return; -+ } -+ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) { -+ list_del_init(&group->run_node); -+ -+ /* If the group has been destroyed while we were -+ * scheduling, ask for an immediate tick to -+ * re-evaluate as soon as possible and get rid of -+ * this dangling group. -+ */ -+ if (group->destroyed) -+ ctx->immediate_tick = true; -+ group_put(group); -+ } -+ -+ /* Return evicted groups to the idle or run queues. Groups -+ * that can no longer be run (because they've been destroyed -+ * or experienced an unrecoverable error) will be scheduled -+ * for destruction in tick_ctx_cleanup(). -+ */ -+ list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) { -+ if (!group_can_run(group)) -+ continue; -+ -+ if (group_is_idle(group)) -+ list_move_tail(&group->run_node, &sched->groups.idle[prio]); -+ else -+ list_move_tail(&group->run_node, &sched->groups.runnable[prio]); -+ group_put(group); -+ } -+ } -+ -+ sched->used_csg_slot_count = ctx->group_count; -+ sched->might_have_idle_groups = ctx->idle_group_count > 0; -+} -+ -+static u64 -+tick_ctx_update_resched_target(struct panthor_scheduler *sched, -+ const struct panthor_sched_tick_ctx *ctx) -+{ -+ /* We had space left, no need to reschedule until some external event happens. */ -+ if (!tick_ctx_is_full(sched, ctx)) -+ goto no_tick; -+ -+ /* If idle groups were scheduled, no need to wake up until some external -+ * event happens (group unblocked, new job submitted, ...). -+ */ -+ if (ctx->idle_group_count) -+ goto no_tick; -+ -+ if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT)) -+ goto no_tick; -+ -+ /* If there are groups of the same priority waiting, we need to -+ * keep the scheduler ticking, otherwise, we'll just wait for -+ * new groups with higher priority to be queued. -+ */ -+ if (!list_empty(&sched->groups.runnable[ctx->min_priority])) { -+ u64 resched_target = sched->last_tick + sched->tick_period; -+ -+ if (time_before64(sched->resched_target, sched->last_tick) || -+ time_before64(resched_target, sched->resched_target)) -+ sched->resched_target = resched_target; -+ -+ return sched->resched_target - sched->last_tick; -+ } -+ -+no_tick: -+ sched->resched_target = U64_MAX; -+ return U64_MAX; -+} -+ -+static void tick_work(struct work_struct *work) -+{ -+ struct panthor_scheduler *sched = container_of(work, struct panthor_scheduler, -+ tick_work.work); -+ struct panthor_device *ptdev = sched->ptdev; -+ struct panthor_sched_tick_ctx ctx; -+ u64 remaining_jiffies = 0, resched_delay; -+ u64 now = get_jiffies_64(); -+ int prio, ret, cookie; -+ -+ if (!drm_dev_enter(&ptdev->base, &cookie)) -+ return; -+ -+ ret = pm_runtime_resume_and_get(ptdev->base.dev); -+ if (drm_WARN_ON(&ptdev->base, ret)) -+ goto out_dev_exit; -+ -+ if (time_before64(now, sched->resched_target)) -+ remaining_jiffies = sched->resched_target - now; -+ -+ mutex_lock(&sched->lock); -+ if (panthor_device_reset_is_pending(sched->ptdev)) -+ goto out_unlock; -+ -+ tick_ctx_init(sched, &ctx, remaining_jiffies != 0); -+ if (ctx.csg_upd_failed_mask) -+ goto out_cleanup_ctx; -+ -+ if (remaining_jiffies) { -+ /* Scheduling forced in the middle of a tick. Only RT groups -+ * can preempt non-RT ones. Currently running RT groups can't be -+ * preempted. -+ */ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; -+ prio >= 0 && !tick_ctx_is_full(sched, &ctx); -+ prio--) { -+ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], -+ true, true); -+ if (prio == PANTHOR_CSG_PRIORITY_RT) { -+ tick_ctx_pick_groups_from_list(sched, &ctx, -+ &sched->groups.runnable[prio], -+ true, false); -+ } -+ } -+ } -+ -+ /* First pick non-idle groups */ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; -+ prio >= 0 && !tick_ctx_is_full(sched, &ctx); -+ prio--) { -+ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.runnable[prio], -+ true, false); -+ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], true, true); -+ } -+ -+ /* If we have free CSG slots left, pick idle groups */ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; -+ prio >= 0 && !tick_ctx_is_full(sched, &ctx); -+ prio--) { -+ /* Check the old_group queue first to avoid reprogramming the slots */ -+ tick_ctx_pick_groups_from_list(sched, &ctx, &ctx.old_groups[prio], false, true); -+ tick_ctx_pick_groups_from_list(sched, &ctx, &sched->groups.idle[prio], -+ false, false); -+ } -+ -+ tick_ctx_apply(sched, &ctx); -+ if (ctx.csg_upd_failed_mask) -+ goto out_cleanup_ctx; -+ -+ if (ctx.idle_group_count == ctx.group_count) { -+ panthor_devfreq_record_idle(sched->ptdev); -+ if (sched->pm.has_ref) { -+ pm_runtime_put_autosuspend(ptdev->base.dev); -+ sched->pm.has_ref = false; -+ } -+ } else { -+ panthor_devfreq_record_busy(sched->ptdev); -+ if (!sched->pm.has_ref) { -+ pm_runtime_get(ptdev->base.dev); -+ sched->pm.has_ref = true; -+ } -+ } -+ -+ sched->last_tick = now; -+ resched_delay = tick_ctx_update_resched_target(sched, &ctx); -+ if (ctx.immediate_tick) -+ resched_delay = 0; -+ -+ if (resched_delay != U64_MAX) -+ sched_queue_delayed_work(sched, tick, resched_delay); -+ -+out_cleanup_ctx: -+ tick_ctx_cleanup(sched, &ctx); -+ -+out_unlock: -+ mutex_unlock(&sched->lock); -+ pm_runtime_mark_last_busy(ptdev->base.dev); -+ pm_runtime_put_autosuspend(ptdev->base.dev); -+ -+out_dev_exit: -+ drm_dev_exit(cookie); -+} -+ -+static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx) -+{ -+ struct panthor_queue *queue = group->queues[queue_idx]; -+ union { -+ struct panthor_syncobj_64b sync64; -+ struct panthor_syncobj_32b sync32; -+ } *syncobj; -+ bool result; -+ u64 value; -+ -+ syncobj = panthor_queue_get_syncwait_obj(group, queue); -+ if (!syncobj) -+ return -EINVAL; -+ -+ value = queue->syncwait.sync64 ? -+ syncobj->sync64.seqno : -+ syncobj->sync32.seqno; -+ -+ if (queue->syncwait.gt) -+ result = value > queue->syncwait.ref; -+ else -+ result = value <= queue->syncwait.ref; -+ -+ if (result) -+ panthor_queue_put_syncwait_obj(queue); -+ -+ return result; -+} -+ -+static void sync_upd_work(struct work_struct *work) -+{ -+ struct panthor_scheduler *sched = container_of(work, -+ struct panthor_scheduler, -+ sync_upd_work); -+ struct panthor_group *group, *tmp; -+ bool immediate_tick = false; -+ -+ mutex_lock(&sched->lock); -+ list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) { -+ u32 tested_queues = group->blocked_queues; -+ u32 unblocked_queues = 0; -+ -+ while (tested_queues) { -+ u32 cs_id = ffs(tested_queues) - 1; -+ int ret; -+ -+ ret = panthor_queue_eval_syncwait(group, cs_id); -+ drm_WARN_ON(&group->ptdev->base, ret < 0); -+ if (ret) -+ unblocked_queues |= BIT(cs_id); -+ -+ tested_queues &= ~BIT(cs_id); -+ } -+ -+ if (unblocked_queues) { -+ group->blocked_queues &= ~unblocked_queues; -+ -+ if (group->csg_id < 0) { -+ list_move(&group->run_node, -+ &sched->groups.runnable[group->priority]); -+ if (group->priority == PANTHOR_CSG_PRIORITY_RT) -+ immediate_tick = true; -+ } -+ } -+ -+ if (!group->blocked_queues) -+ list_del_init(&group->wait_node); -+ } -+ mutex_unlock(&sched->lock); -+ -+ if (immediate_tick) -+ sched_queue_delayed_work(sched, tick, 0); -+} -+ -+static void group_schedule_locked(struct panthor_group *group, u32 queue_mask) -+{ -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct list_head *queue = &sched->groups.runnable[group->priority]; -+ u64 delay_jiffies = 0; -+ bool was_idle; -+ u64 now; -+ -+ if (!group_can_run(group)) -+ return; -+ -+ /* All updated queues are blocked, no need to wake up the scheduler. */ -+ if ((queue_mask & group->blocked_queues) == queue_mask) -+ return; -+ -+ was_idle = group_is_idle(group); -+ group->idle_queues &= ~queue_mask; -+ -+ /* Don't mess up with the lists if we're in a middle of a reset. */ -+ if (atomic_read(&sched->reset.in_progress)) -+ return; -+ -+ if (was_idle && !group_is_idle(group)) -+ list_move_tail(&group->run_node, queue); -+ -+ /* RT groups are preemptive. */ -+ if (group->priority == PANTHOR_CSG_PRIORITY_RT) { -+ sched_queue_delayed_work(sched, tick, 0); -+ return; -+ } -+ -+ /* Some groups might be idle, force an immediate tick to -+ * re-evaluate. -+ */ -+ if (sched->might_have_idle_groups) { -+ sched_queue_delayed_work(sched, tick, 0); -+ return; -+ } -+ -+ /* Scheduler is ticking, nothing to do. */ -+ if (sched->resched_target != U64_MAX) { -+ /* If there are free slots, force immediating ticking. */ -+ if (sched->used_csg_slot_count < sched->csg_slot_count) -+ sched_queue_delayed_work(sched, tick, 0); -+ -+ return; -+ } -+ -+ /* Scheduler tick was off, recalculate the resched_target based on the -+ * last tick event, and queue the scheduler work. -+ */ -+ now = get_jiffies_64(); -+ sched->resched_target = sched->last_tick + sched->tick_period; -+ if (sched->used_csg_slot_count == sched->csg_slot_count && -+ time_before64(now, sched->resched_target)) -+ delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX); -+ -+ sched_queue_delayed_work(sched, tick, delay_jiffies); -+} -+ -+static void queue_stop(struct panthor_queue *queue, -+ struct panthor_job *bad_job) -+{ -+ drm_sched_stop(&queue->scheduler, bad_job ? &bad_job->base : NULL); -+} -+ -+static void queue_start(struct panthor_queue *queue) -+{ -+ struct panthor_job *job; -+ -+ /* Re-assign the parent fences. */ -+ list_for_each_entry(job, &queue->scheduler.pending_list, base.list) -+ job->base.s_fence->parent = dma_fence_get(job->done_fence); -+ -+ drm_sched_start(&queue->scheduler, true); -+} -+ -+static void panthor_group_stop(struct panthor_group *group) -+{ -+ struct panthor_scheduler *sched = group->ptdev->scheduler; -+ -+ lockdep_assert_held(&sched->reset.lock); -+ -+ for (u32 i = 0; i < group->queue_count; i++) -+ queue_stop(group->queues[i], NULL); -+ -+ group_get(group); -+ list_move_tail(&group->run_node, &sched->reset.stopped_groups); -+} -+ -+static void panthor_group_start(struct panthor_group *group) -+{ -+ struct panthor_scheduler *sched = group->ptdev->scheduler; -+ -+ lockdep_assert_held(&group->ptdev->scheduler->reset.lock); -+ -+ for (u32 i = 0; i < group->queue_count; i++) -+ queue_start(group->queues[i]); -+ -+ if (group_can_run(group)) { -+ list_move_tail(&group->run_node, -+ group_is_idle(group) ? -+ &sched->groups.idle[group->priority] : -+ &sched->groups.runnable[group->priority]); -+ } else { -+ list_del_init(&group->run_node); -+ list_del_init(&group->wait_node); -+ group_queue_work(group, term); -+ } -+ -+ group_put(group); -+} -+ -+static void panthor_sched_immediate_tick(struct panthor_device *ptdev) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ -+ sched_queue_delayed_work(sched, tick, 0); -+} -+ -+/** -+ * panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler. -+ */ -+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev) -+{ -+ /* Force a tick to immediately kill faulty groups. */ -+ panthor_sched_immediate_tick(ptdev); -+} -+ -+void panthor_sched_resume(struct panthor_device *ptdev) -+{ -+ /* Force a tick to re-evaluate after a resume. */ -+ panthor_sched_immediate_tick(ptdev); -+} -+ -+void panthor_sched_suspend(struct panthor_device *ptdev) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_csg_slots_upd_ctx upd_ctx; -+ u64 suspended_slots, faulty_slots; -+ struct panthor_group *group; -+ u32 i; -+ -+ mutex_lock(&sched->lock); -+ csgs_upd_ctx_init(&upd_ctx); -+ for (i = 0; i < sched->csg_slot_count; i++) { -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; -+ -+ if (csg_slot->group) { -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, i, -+ CSG_STATE_SUSPEND, -+ CSG_STATE_MASK); -+ } -+ } -+ -+ suspended_slots = upd_ctx.update_mask; -+ -+ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); -+ suspended_slots &= ~upd_ctx.timedout_mask; -+ faulty_slots = upd_ctx.timedout_mask; -+ -+ if (faulty_slots) { -+ u32 slot_mask = faulty_slots; -+ -+ drm_err(&ptdev->base, "CSG suspend failed, escalating to termination"); -+ csgs_upd_ctx_init(&upd_ctx); -+ while (slot_mask) { -+ u32 csg_id = ffs(slot_mask) - 1; -+ -+ csgs_upd_ctx_queue_reqs(ptdev, &upd_ctx, csg_id, -+ CSG_STATE_TERMINATE, -+ CSG_STATE_MASK); -+ slot_mask &= ~BIT(csg_id); -+ } -+ -+ csgs_upd_ctx_apply_locked(ptdev, &upd_ctx); -+ -+ slot_mask = upd_ctx.timedout_mask; -+ while (slot_mask) { -+ u32 csg_id = ffs(slot_mask) - 1; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ -+ /* Terminate command timedout, but the soft-reset will -+ * automatically terminate all active groups, so let's -+ * force the state to halted here. -+ */ -+ if (csg_slot->group->state != PANTHOR_CS_GROUP_TERMINATED) -+ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; -+ slot_mask &= ~BIT(csg_id); -+ } -+ } -+ -+ /* Flush L2 and LSC caches to make sure suspend state is up-to-date. -+ * If the flush fails, flag all queues for termination. -+ */ -+ if (suspended_slots) { -+ bool flush_caches_failed = false; -+ u32 slot_mask = suspended_slots; -+ -+ if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, 0)) -+ flush_caches_failed = true; -+ -+ while (slot_mask) { -+ u32 csg_id = ffs(slot_mask) - 1; -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; -+ -+ if (flush_caches_failed) -+ csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED; -+ else -+ csg_slot_sync_update_locked(ptdev, csg_id); -+ -+ slot_mask &= ~BIT(csg_id); -+ } -+ -+ if (flush_caches_failed) -+ faulty_slots |= suspended_slots; -+ } -+ -+ for (i = 0; i < sched->csg_slot_count; i++) { -+ struct panthor_csg_slot *csg_slot = &sched->csg_slots[i]; -+ -+ group = csg_slot->group; -+ if (!group) -+ continue; -+ -+ group_get(group); -+ -+ if (group->csg_id >= 0) -+ sched_process_csg_irq_locked(ptdev, group->csg_id); -+ -+ group_unbind_locked(group); -+ -+ drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node)); -+ -+ if (group_can_run(group)) { -+ list_add(&group->run_node, -+ &sched->groups.idle[group->priority]); -+ } else { -+ /* We don't bother stopping the scheduler if the group is -+ * faulty, the group termination work will finish the job. -+ */ -+ list_del_init(&group->wait_node); -+ group_queue_work(group, term); -+ } -+ group_put(group); -+ } -+ mutex_unlock(&sched->lock); -+} -+ -+void panthor_sched_pre_reset(struct panthor_device *ptdev) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_group *group, *group_tmp; -+ u32 i; -+ -+ mutex_lock(&sched->reset.lock); -+ atomic_set(&sched->reset.in_progress, true); -+ -+ /* Cancel all scheduler works. Once this is done, these works can't be -+ * scheduled again until the reset operation is complete. -+ */ -+ cancel_work_sync(&sched->sync_upd_work); -+ cancel_delayed_work_sync(&sched->tick_work); -+ -+ panthor_sched_suspend(ptdev); -+ -+ /* Stop all groups that might still accept jobs, so we don't get passed -+ * new jobs while we're resetting. -+ */ -+ for (i = 0; i < ARRAY_SIZE(sched->groups.runnable); i++) { -+ /* All groups should be in the idle lists. */ -+ drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i])); -+ list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node) -+ panthor_group_stop(group); -+ } -+ -+ for (i = 0; i < ARRAY_SIZE(sched->groups.idle); i++) { -+ list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node) -+ panthor_group_stop(group); -+ } -+ -+ mutex_unlock(&sched->reset.lock); -+} -+ -+void panthor_sched_post_reset(struct panthor_device *ptdev) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_group *group, *group_tmp; -+ -+ mutex_lock(&sched->reset.lock); -+ -+ list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) -+ panthor_group_start(group); -+ -+ /* We're done resetting the GPU, clear the reset.in_progress bit so we can -+ * kick the scheduler. -+ */ -+ atomic_set(&sched->reset.in_progress, false); -+ mutex_unlock(&sched->reset.lock); -+ -+ sched_queue_delayed_work(sched, tick, 0); -+ -+ sched_queue_work(sched, sync_upd); -+} -+ -+static void group_sync_upd_work(struct work_struct *work) -+{ -+ struct panthor_group *group = -+ container_of(work, struct panthor_group, sync_upd_work); -+ struct panthor_job *job, *job_tmp; -+ LIST_HEAD(done_jobs); -+ u32 queue_idx; -+ bool cookie; -+ -+ cookie = dma_fence_begin_signalling(); -+ for (queue_idx = 0; queue_idx < group->queue_count; queue_idx++) { -+ struct panthor_queue *queue = group->queues[queue_idx]; -+ struct panthor_syncobj_64b *syncobj; -+ -+ if (!queue) -+ continue; -+ -+ syncobj = group->syncobjs->kmap + (queue_idx * sizeof(*syncobj)); -+ -+ spin_lock(&queue->fence_ctx.lock); -+ list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) { -+ if (!job->call_info.size) -+ continue; -+ -+ if (syncobj->seqno < job->done_fence->seqno) -+ break; -+ -+ list_move_tail(&job->node, &done_jobs); -+ dma_fence_signal_locked(job->done_fence); -+ } -+ spin_unlock(&queue->fence_ctx.lock); -+ } -+ dma_fence_end_signalling(cookie); -+ -+ list_for_each_entry_safe(job, job_tmp, &done_jobs, node) { -+ list_del_init(&job->node); -+ panthor_job_put(&job->base); -+ } -+ -+ group_put(group); -+} -+ -+static struct dma_fence * -+queue_run_job(struct drm_sched_job *sched_job) -+{ -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ struct panthor_group *group = job->group; -+ struct panthor_queue *queue = group->queues[job->queue_idx]; -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ u32 ringbuf_size = panthor_kernel_bo_size(queue->ringbuf); -+ u32 ringbuf_insert = queue->iface.input->insert & (ringbuf_size - 1); -+ u64 addr_reg = ptdev->csif_info.cs_reg_count - -+ ptdev->csif_info.unpreserved_cs_reg_count; -+ u64 val_reg = addr_reg + 2; -+ u64 sync_addr = panthor_kernel_bo_gpuva(group->syncobjs) + -+ job->queue_idx * sizeof(struct panthor_syncobj_64b); -+ u32 waitall_mask = GENMASK(sched->sb_slot_count - 1, 0); -+ struct dma_fence *done_fence; -+ int ret; -+ -+ u64 call_instrs[NUM_INSTRS_PER_SLOT] = { -+ /* MOV32 rX+2, cs.latest_flush */ -+ (2ull << 56) | (val_reg << 48) | job->call_info.latest_flush, -+ -+ /* FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 */ -+ (36ull << 56) | (0ull << 48) | (val_reg << 40) | (0 << 16) | 0x233, -+ -+ /* MOV48 rX:rX+1, cs.start */ -+ (1ull << 56) | (addr_reg << 48) | job->call_info.start, -+ -+ /* MOV32 rX+2, cs.size */ -+ (2ull << 56) | (val_reg << 48) | job->call_info.size, -+ -+ /* WAIT(0) => waits for FLUSH_CACHE2 instruction */ -+ (3ull << 56) | (1 << 16), -+ -+ /* CALL rX:rX+1, rX+2 */ -+ (32ull << 56) | (addr_reg << 40) | (val_reg << 32), -+ -+ /* MOV48 rX:rX+1, sync_addr */ -+ (1ull << 56) | (addr_reg << 48) | sync_addr, -+ -+ /* MOV48 rX+2, #1 */ -+ (1ull << 56) | (val_reg << 48) | 1, -+ -+ /* WAIT(all) */ -+ (3ull << 56) | (waitall_mask << 16), -+ -+ /* SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2*/ -+ (51ull << 56) | (0ull << 48) | (addr_reg << 40) | (val_reg << 32) | (0 << 16) | 1, -+ -+ /* ERROR_BARRIER, so we can recover from faults at job -+ * boundaries. -+ */ -+ (47ull << 56), -+ }; -+ -+ /* Need to be cacheline aligned to please the prefetcher. */ -+ static_assert(sizeof(call_instrs) % 64 == 0, -+ "call_instrs is not aligned on a cacheline"); -+ -+ /* Stream size is zero, nothing to do => return a NULL fence and let -+ * drm_sched signal the parent. -+ */ -+ if (!job->call_info.size) -+ return NULL; -+ -+ ret = pm_runtime_resume_and_get(ptdev->base.dev); -+ if (drm_WARN_ON(&ptdev->base, ret)) -+ return ERR_PTR(ret); -+ -+ mutex_lock(&sched->lock); -+ if (!group_can_run(group)) { -+ done_fence = ERR_PTR(-ECANCELED); -+ goto out_unlock; -+ } -+ -+ dma_fence_init(job->done_fence, -+ &panthor_queue_fence_ops, -+ &queue->fence_ctx.lock, -+ queue->fence_ctx.id, -+ atomic64_inc_return(&queue->fence_ctx.seqno)); -+ -+ memcpy(queue->ringbuf->kmap + ringbuf_insert, -+ call_instrs, sizeof(call_instrs)); -+ -+ panthor_job_get(&job->base); -+ spin_lock(&queue->fence_ctx.lock); -+ list_add_tail(&job->node, &queue->fence_ctx.in_flight_jobs); -+ spin_unlock(&queue->fence_ctx.lock); -+ -+ job->ringbuf.start = queue->iface.input->insert; -+ job->ringbuf.end = job->ringbuf.start + sizeof(call_instrs); -+ -+ /* Make sure the ring buffer is updated before the INSERT -+ * register. -+ */ -+ wmb(); -+ -+ queue->iface.input->extract = queue->iface.output->extract; -+ queue->iface.input->insert = job->ringbuf.end; -+ -+ if (group->csg_id < 0) { -+ /* If the queue is blocked, we want to keep the timeout running, so we -+ * can detect unbounded waits and kill the group when that happens. -+ * Otherwise, we suspend the timeout so the time we spend waiting for -+ * a CSG slot is not counted. -+ */ -+ if (!(group->blocked_queues & BIT(job->queue_idx)) && -+ !queue->timeout_suspended) { -+ queue->remaining_time = drm_sched_suspend_timeout(&queue->scheduler); -+ queue->timeout_suspended = true; -+ } -+ -+ group_schedule_locked(group, BIT(job->queue_idx)); -+ } else { -+ gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), 1); -+ if (!sched->pm.has_ref && -+ !(group->blocked_queues & BIT(job->queue_idx))) { -+ pm_runtime_get(ptdev->base.dev); -+ sched->pm.has_ref = true; -+ } -+ } -+ -+ done_fence = dma_fence_get(job->done_fence); -+ -+out_unlock: -+ mutex_unlock(&sched->lock); -+ pm_runtime_mark_last_busy(ptdev->base.dev); -+ pm_runtime_put_autosuspend(ptdev->base.dev); -+ -+ return done_fence; -+} -+ -+static enum drm_gpu_sched_stat -+queue_timedout_job(struct drm_sched_job *sched_job) -+{ -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ struct panthor_group *group = job->group; -+ struct panthor_device *ptdev = group->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_queue *queue = group->queues[job->queue_idx]; -+ -+ drm_warn(&ptdev->base, "job timeout\n"); -+ -+ drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress)); -+ -+ queue_stop(queue, job); -+ -+ mutex_lock(&sched->lock); -+ group->timedout = true; -+ if (group->csg_id >= 0) { -+ sched_queue_delayed_work(ptdev->scheduler, tick, 0); -+ } else { -+ /* Remove from the run queues, so the scheduler can't -+ * pick the group on the next tick. -+ */ -+ drm_WARN_ON(&ptdev->base, list_empty(&group->run_node)); -+ list_del_init(&group->run_node); -+ list_del_init(&group->wait_node); -+ -+ group_queue_work(group, term); -+ } -+ mutex_unlock(&sched->lock); -+ -+ queue_start(queue); -+ -+ return DRM_GPU_SCHED_STAT_NOMINAL; -+} -+ -+static void queue_free_job(struct drm_sched_job *sched_job) -+{ -+ drm_sched_job_cleanup(sched_job); -+ panthor_job_put(sched_job); -+} -+ -+static const struct drm_sched_backend_ops panthor_queue_sched_ops = { -+ .run_job = queue_run_job, -+ .timedout_job = queue_timedout_job, -+ .free_job = queue_free_job, -+}; -+ -+static struct panthor_queue * -+group_create_queue(struct panthor_group *group, -+ const struct drm_panthor_queue_create *args) -+{ -+ struct drm_gpu_scheduler *drm_sched; -+ struct panthor_queue *queue; -+ int ret; -+ -+ if (args->pad[0] || args->pad[1] || args->pad[2]) -+ return ERR_PTR(-EINVAL); -+ -+ if (args->ringbuf_size < SZ_4K || args->ringbuf_size > SZ_64K || -+ !is_power_of_2(args->ringbuf_size)) -+ return ERR_PTR(-EINVAL); -+ -+ if (args->priority > CSF_MAX_QUEUE_PRIO) -+ return ERR_PTR(-EINVAL); -+ -+ queue = kzalloc(sizeof(*queue), GFP_KERNEL); -+ if (!queue) -+ return ERR_PTR(-ENOMEM); -+ -+ queue->fence_ctx.id = dma_fence_context_alloc(1); -+ spin_lock_init(&queue->fence_ctx.lock); -+ INIT_LIST_HEAD(&queue->fence_ctx.in_flight_jobs); -+ -+ queue->priority = args->priority; -+ -+ queue->ringbuf = panthor_kernel_bo_create(group->ptdev, group->vm, -+ args->ringbuf_size, -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+ if (IS_ERR(queue->ringbuf)) { -+ ret = PTR_ERR(queue->ringbuf); -+ goto err_free_queue; -+ } -+ -+ ret = panthor_kernel_bo_vmap(queue->ringbuf); -+ if (ret) -+ goto err_free_queue; -+ -+ queue->iface.mem = panthor_fw_alloc_queue_iface_mem(group->ptdev, -+ &queue->iface.input, -+ &queue->iface.output, -+ &queue->iface.input_fw_va, -+ &queue->iface.output_fw_va); -+ if (IS_ERR(queue->iface.mem)) { -+ ret = PTR_ERR(queue->iface.mem); -+ goto err_free_queue; -+ } -+ -+ ret = drm_sched_init(&queue->scheduler, &panthor_queue_sched_ops, -+ group->ptdev->scheduler->wq, 1, -+ args->ringbuf_size / (NUM_INSTRS_PER_SLOT * sizeof(u64)), -+ 0, msecs_to_jiffies(JOB_TIMEOUT_MS), -+ group->ptdev->reset.wq, -+ NULL, "panthor-queue", group->ptdev->base.dev); -+ if (ret) -+ goto err_free_queue; -+ -+ drm_sched = &queue->scheduler; -+ ret = drm_sched_entity_init(&queue->entity, 0, &drm_sched, 1, NULL); -+ -+ return queue; -+ -+err_free_queue: -+ group_free_queue(group, queue); -+ return ERR_PTR(ret); -+} -+ -+#define MAX_GROUPS_PER_POOL 128 -+ -+int panthor_group_create(struct panthor_file *pfile, -+ const struct drm_panthor_group_create *group_args, -+ const struct drm_panthor_queue_create *queue_args) -+{ -+ struct panthor_device *ptdev = pfile->ptdev; -+ struct panthor_group_pool *gpool = pfile->groups; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); -+ struct panthor_group *group = NULL; -+ u32 gid, i, suspend_size; -+ int ret; -+ -+ if (group_args->pad) -+ return -EINVAL; -+ -+ if (group_args->priority > PANTHOR_CSG_PRIORITY_HIGH) -+ return -EINVAL; -+ -+ if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) || -+ (group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) || -+ (group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present)) -+ return -EINVAL; -+ -+ if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores || -+ hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores || -+ hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores) -+ return -EINVAL; -+ -+ group = kzalloc(sizeof(*group), GFP_KERNEL); -+ if (!group) -+ return -ENOMEM; -+ -+ spin_lock_init(&group->fatal_lock); -+ kref_init(&group->refcount); -+ group->state = PANTHOR_CS_GROUP_CREATED; -+ group->csg_id = -1; -+ -+ group->ptdev = ptdev; -+ group->max_compute_cores = group_args->max_compute_cores; -+ group->compute_core_mask = group_args->compute_core_mask; -+ group->max_fragment_cores = group_args->max_fragment_cores; -+ group->fragment_core_mask = group_args->fragment_core_mask; -+ group->max_tiler_cores = group_args->max_tiler_cores; -+ group->tiler_core_mask = group_args->tiler_core_mask; -+ group->priority = group_args->priority; -+ -+ INIT_LIST_HEAD(&group->wait_node); -+ INIT_LIST_HEAD(&group->run_node); -+ INIT_WORK(&group->term_work, group_term_work); -+ INIT_WORK(&group->sync_upd_work, group_sync_upd_work); -+ INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work); -+ INIT_WORK(&group->release_work, group_release_work); -+ -+ group->vm = panthor_vm_pool_get_vm(pfile->vms, group_args->vm_id); -+ if (!group->vm) { -+ ret = -EINVAL; -+ goto err_put_group; -+ } -+ -+ suspend_size = csg_iface->control->suspend_size; -+ group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); -+ if (IS_ERR(group->suspend_buf)) { -+ ret = PTR_ERR(group->suspend_buf); -+ group->suspend_buf = NULL; -+ goto err_put_group; -+ } -+ -+ suspend_size = csg_iface->control->protm_suspend_size; -+ group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, suspend_size); -+ if (IS_ERR(group->protm_suspend_buf)) { -+ ret = PTR_ERR(group->protm_suspend_buf); -+ group->protm_suspend_buf = NULL; -+ goto err_put_group; -+ } -+ -+ group->syncobjs = panthor_kernel_bo_create(ptdev, group->vm, -+ group_args->queues.count * -+ sizeof(struct panthor_syncobj_64b), -+ DRM_PANTHOR_BO_NO_MMAP, -+ DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC | -+ DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED, -+ PANTHOR_VM_KERNEL_AUTO_VA); -+ if (IS_ERR(group->syncobjs)) { -+ ret = PTR_ERR(group->syncobjs); -+ goto err_put_group; -+ } -+ -+ ret = panthor_kernel_bo_vmap(group->syncobjs); -+ if (ret) -+ goto err_put_group; -+ -+ memset(group->syncobjs->kmap, 0, -+ group_args->queues.count * sizeof(struct panthor_syncobj_64b)); -+ -+ for (i = 0; i < group_args->queues.count; i++) { -+ group->queues[i] = group_create_queue(group, &queue_args[i]); -+ if (IS_ERR(group->queues[i])) { -+ ret = PTR_ERR(group->queues[i]); -+ group->queues[i] = NULL; -+ goto err_put_group; -+ } -+ -+ group->queue_count++; -+ } -+ -+ group->idle_queues = GENMASK(group->queue_count - 1, 0); -+ -+ ret = xa_alloc(&gpool->xa, &gid, group, XA_LIMIT(1, MAX_GROUPS_PER_POOL), GFP_KERNEL); -+ if (ret) -+ goto err_put_group; -+ -+ mutex_lock(&sched->reset.lock); -+ if (atomic_read(&sched->reset.in_progress)) { -+ panthor_group_stop(group); -+ } else { -+ mutex_lock(&sched->lock); -+ list_add_tail(&group->run_node, -+ &sched->groups.idle[group->priority]); -+ mutex_unlock(&sched->lock); -+ } -+ mutex_unlock(&sched->reset.lock); -+ -+ return gid; -+ -+err_put_group: -+ group_put(group); -+ return ret; -+} -+ -+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle) -+{ -+ struct panthor_group_pool *gpool = pfile->groups; -+ struct panthor_device *ptdev = pfile->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_group *group; -+ -+ group = xa_erase(&gpool->xa, group_handle); -+ if (!group) -+ return -EINVAL; -+ -+ for (u32 i = 0; i < group->queue_count; i++) { -+ if (group->queues[i]) -+ drm_sched_entity_destroy(&group->queues[i]->entity); -+ } -+ -+ mutex_lock(&sched->reset.lock); -+ mutex_lock(&sched->lock); -+ group->destroyed = true; -+ if (group->csg_id >= 0) { -+ sched_queue_delayed_work(sched, tick, 0); -+ } else if (!atomic_read(&sched->reset.in_progress)) { -+ /* Remove from the run queues, so the scheduler can't -+ * pick the group on the next tick. -+ */ -+ list_del_init(&group->run_node); -+ list_del_init(&group->wait_node); -+ group_queue_work(group, term); -+ } -+ mutex_unlock(&sched->lock); -+ mutex_unlock(&sched->reset.lock); -+ -+ group_put(group); -+ return 0; -+} -+ -+int panthor_group_get_state(struct panthor_file *pfile, -+ struct drm_panthor_group_get_state *get_state) -+{ -+ struct panthor_group_pool *gpool = pfile->groups; -+ struct panthor_device *ptdev = pfile->ptdev; -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ struct panthor_group *group; -+ -+ if (get_state->pad) -+ return -EINVAL; -+ -+ group = group_get(xa_load(&gpool->xa, get_state->group_handle)); -+ if (!group) -+ return -EINVAL; -+ -+ memset(get_state, 0, sizeof(*get_state)); -+ -+ mutex_lock(&sched->lock); -+ if (group->timedout) -+ get_state->state |= DRM_PANTHOR_GROUP_STATE_TIMEDOUT; -+ if (group->fatal_queues) { -+ get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT; -+ get_state->fatal_queues = group->fatal_queues; -+ } -+ mutex_unlock(&sched->lock); -+ -+ group_put(group); -+ return 0; -+} -+ -+int panthor_group_pool_create(struct panthor_file *pfile) -+{ -+ struct panthor_group_pool *gpool; -+ -+ gpool = kzalloc(sizeof(*gpool), GFP_KERNEL); -+ if (!gpool) -+ return -ENOMEM; -+ -+ xa_init_flags(&gpool->xa, XA_FLAGS_ALLOC1); -+ pfile->groups = gpool; -+ return 0; -+} -+ -+void panthor_group_pool_destroy(struct panthor_file *pfile) -+{ -+ struct panthor_group_pool *gpool = pfile->groups; -+ struct panthor_group *group; -+ unsigned long i; -+ -+ if (IS_ERR_OR_NULL(gpool)) -+ return; -+ -+ xa_for_each(&gpool->xa, i, group) -+ panthor_group_destroy(pfile, i); -+ -+ xa_destroy(&gpool->xa); -+ kfree(gpool); -+ pfile->groups = NULL; -+} -+ -+static void job_release(struct kref *ref) -+{ -+ struct panthor_job *job = container_of(ref, struct panthor_job, refcount); -+ -+ drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node)); -+ -+ if (job->base.s_fence) -+ drm_sched_job_cleanup(&job->base); -+ -+ if (job->done_fence && job->done_fence->ops) -+ dma_fence_put(job->done_fence); -+ else -+ dma_fence_free(job->done_fence); -+ -+ group_put(job->group); -+ -+ kfree(job); -+} -+ -+struct drm_sched_job *panthor_job_get(struct drm_sched_job *sched_job) -+{ -+ if (sched_job) { -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ -+ kref_get(&job->refcount); -+ } -+ -+ return sched_job; -+} -+ -+void panthor_job_put(struct drm_sched_job *sched_job) -+{ -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ -+ if (sched_job) -+ kref_put(&job->refcount, job_release); -+} -+ -+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job) -+{ -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ -+ return job->group->vm; -+} -+ -+struct drm_sched_job * -+panthor_job_create(struct panthor_file *pfile, -+ u16 group_handle, -+ const struct drm_panthor_queue_submit *qsubmit) -+{ -+ struct panthor_group_pool *gpool = pfile->groups; -+ struct panthor_job *job; -+ int ret; -+ -+ if (qsubmit->pad) -+ return ERR_PTR(-EINVAL); -+ -+ /* If stream_addr is zero, so stream_size should be. */ -+ if ((qsubmit->stream_size == 0) != (qsubmit->stream_addr == 0)) -+ return ERR_PTR(-EINVAL); -+ -+ /* Make sure the address is aligned on 64-byte (cacheline) and the size is -+ * aligned on 8-byte (instruction size). -+ */ -+ if ((qsubmit->stream_addr & 63) || (qsubmit->stream_size & 7)) -+ return ERR_PTR(-EINVAL); -+ -+ /* bits 24:30 must be zero. */ -+ if (qsubmit->latest_flush & GENMASK(30, 24)) -+ return ERR_PTR(-EINVAL); -+ -+ job = kzalloc(sizeof(*job), GFP_KERNEL); -+ if (!job) -+ return ERR_PTR(-ENOMEM); -+ -+ kref_init(&job->refcount); -+ job->queue_idx = qsubmit->queue_index; -+ job->call_info.size = qsubmit->stream_size; -+ job->call_info.start = qsubmit->stream_addr; -+ job->call_info.latest_flush = qsubmit->latest_flush; -+ INIT_LIST_HEAD(&job->node); -+ -+ job->group = group_get(xa_load(&gpool->xa, group_handle)); -+ if (!job->group) { -+ ret = -EINVAL; -+ goto err_put_job; -+ } -+ -+ if (job->queue_idx >= job->group->queue_count || -+ !job->group->queues[job->queue_idx]) { -+ ret = -EINVAL; -+ goto err_put_job; -+ } -+ -+ job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL); -+ if (!job->done_fence) { -+ ret = -ENOMEM; -+ goto err_put_job; -+ } -+ -+ ret = drm_sched_job_init(&job->base, -+ &job->group->queues[job->queue_idx]->entity, -+ 1, job->group); -+ if (ret) -+ goto err_put_job; -+ -+ return &job->base; -+ -+err_put_job: -+ panthor_job_put(&job->base); -+ return ERR_PTR(ret); -+} -+ -+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *sched_job) -+{ -+ struct panthor_job *job = container_of(sched_job, struct panthor_job, base); -+ -+ /* Still not sure why we want USAGE_WRITE for external objects, since I -+ * was assuming this would be handled through explicit syncs being imported -+ * to external BOs with DMA_BUF_IOCTL_IMPORT_SYNC_FILE, but other drivers -+ * seem to pass DMA_RESV_USAGE_WRITE, so there must be a good reason. -+ */ -+ panthor_vm_update_resvs(job->group->vm, exec, &sched_job->s_fence->finished, -+ DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); -+} -+ -+void panthor_sched_unplug(struct panthor_device *ptdev) -+{ -+ struct panthor_scheduler *sched = ptdev->scheduler; -+ -+ cancel_delayed_work_sync(&sched->tick_work); -+ -+ mutex_lock(&sched->lock); -+ if (sched->pm.has_ref) { -+ pm_runtime_put(ptdev->base.dev); -+ sched->pm.has_ref = false; -+ } -+ mutex_unlock(&sched->lock); -+} -+ -+static void panthor_sched_fini(struct drm_device *ddev, void *res) -+{ -+ struct panthor_scheduler *sched = res; -+ int prio; -+ -+ if (!sched || !sched->csg_slot_count) -+ return; -+ -+ cancel_delayed_work_sync(&sched->tick_work); -+ -+ if (sched->wq) -+ destroy_workqueue(sched->wq); -+ -+ if (sched->heap_alloc_wq) -+ destroy_workqueue(sched->heap_alloc_wq); -+ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio])); -+ drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio])); -+ } -+ -+ drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting)); -+} -+ -+int panthor_sched_init(struct panthor_device *ptdev) -+{ -+ struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); -+ struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, 0); -+ struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, 0, 0); -+ struct panthor_scheduler *sched; -+ u32 gpu_as_count, num_groups; -+ int prio, ret; -+ -+ sched = drmm_kzalloc(&ptdev->base, sizeof(*sched), GFP_KERNEL); -+ if (!sched) -+ return -ENOMEM; -+ -+ /* The highest bit in JOB_INT_* is reserved for globabl IRQs. That -+ * leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here. -+ */ -+ num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num); -+ -+ /* The FW-side scheduler might deadlock if two groups with the same -+ * priority try to access a set of resources that overlaps, with part -+ * of the resources being allocated to one group and the other part to -+ * the other group, both groups waiting for the remaining resources to -+ * be allocated. To avoid that, it is recommended to assign each CSG a -+ * different priority. In theory we could allow several groups to have -+ * the same CSG priority if they don't request the same resources, but -+ * that makes the scheduling logic more complicated, so let's clamp -+ * the number of CSG slots to MAX_CSG_PRIO + 1 for now. -+ */ -+ num_groups = min_t(u32, MAX_CSG_PRIO + 1, num_groups); -+ -+ /* We need at least one AS for the MCU and one for the GPU contexts. */ -+ gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(31, 1)); -+ if (!gpu_as_count) { -+ drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)", -+ gpu_as_count + 1); -+ return -EINVAL; -+ } -+ -+ sched->ptdev = ptdev; -+ sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features); -+ sched->csg_slot_count = num_groups; -+ sched->cs_slot_count = csg_iface->control->stream_num; -+ sched->as_slot_count = gpu_as_count; -+ ptdev->csif_info.csg_slot_count = sched->csg_slot_count; -+ ptdev->csif_info.cs_slot_count = sched->cs_slot_count; -+ ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count; -+ -+ sched->last_tick = 0; -+ sched->resched_target = U64_MAX; -+ sched->tick_period = msecs_to_jiffies(10); -+ INIT_DELAYED_WORK(&sched->tick_work, tick_work); -+ INIT_WORK(&sched->sync_upd_work, sync_upd_work); -+ INIT_WORK(&sched->fw_events_work, process_fw_events_work); -+ -+ ret = drmm_mutex_init(&ptdev->base, &sched->lock); -+ if (ret) -+ return ret; -+ -+ for (prio = PANTHOR_CSG_PRIORITY_COUNT - 1; prio >= 0; prio--) { -+ INIT_LIST_HEAD(&sched->groups.runnable[prio]); -+ INIT_LIST_HEAD(&sched->groups.idle[prio]); -+ } -+ INIT_LIST_HEAD(&sched->groups.waiting); -+ -+ ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock); -+ if (ret) -+ return ret; -+ -+ INIT_LIST_HEAD(&sched->reset.stopped_groups); -+ -+ /* sched->heap_alloc_wq will be used for heap chunk allocation on -+ * tiler OOM events, which means we can't use the same workqueue for -+ * the scheduler because works queued by the scheduler are in -+ * the dma-signalling path. Allocate a dedicated heap_alloc_wq to -+ * work around this limitation. -+ * -+ * FIXME: Ultimately, what we need is a failable/non-blocking GEM -+ * allocation path that we can call when a heap OOM is reported. The -+ * FW is smart enough to fall back on other methods if the kernel can't -+ * allocate memory, and fail the tiling job if none of these -+ * countermeasures worked. -+ * -+ * Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the -+ * system is running out of memory. -+ */ -+ sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, 0); -+ sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); -+ if (!sched->wq || !sched->heap_alloc_wq) { -+ panthor_sched_fini(&ptdev->base, sched); -+ drm_err(&ptdev->base, "Failed to allocate the workqueues"); -+ return -ENOMEM; -+ } -+ -+ ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched); -+ if (ret) -+ return ret; -+ -+ ptdev->scheduler = sched; -+ return 0; -+} -diff --git a/drivers/gpu/drm/panthor/panthor_sched.h b/drivers/gpu/drm/panthor/panthor_sched.h -new file mode 100644 -index 000000000000..bd6912942bc1 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_sched.h -@@ -0,0 +1,48 @@ -+/* SPDX-License-Identifier: GPL-2.0 or MIT */ -+/* Copyright 2023 Collabora ltd. */ -+ -+#ifndef __PANTHOR_SCHED_H__ -+#define __PANTHOR_SCHED_H__ -+ -+#include -+ -+struct drm_exec; -+struct dma_fence; -+struct drm_file; -+struct drm_gem_object; -+struct drm_sched_job; -+struct panthor_device; -+struct panthor_file; -+struct panthor_group_pool; -+struct panthor_job; -+ -+int panthor_group_create(struct panthor_file *pfile, -+ const struct drm_panthor_group_create *group_args, -+ const struct drm_panthor_queue_create *queue_args); -+int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle); -+int panthor_group_get_state(struct panthor_file *pfile, -+ struct drm_panthor_group_get_state *get_state); -+ -+struct drm_sched_job * -+panthor_job_create(struct panthor_file *pfile, -+ u16 group_handle, -+ const struct drm_panthor_queue_submit *qsubmit); -+struct drm_sched_job *panthor_job_get(struct drm_sched_job *job); -+struct panthor_vm *panthor_job_vm(struct drm_sched_job *sched_job); -+void panthor_job_put(struct drm_sched_job *job); -+void panthor_job_update_resvs(struct drm_exec *exec, struct drm_sched_job *job); -+ -+int panthor_group_pool_create(struct panthor_file *pfile); -+void panthor_group_pool_destroy(struct panthor_file *pfile); -+ -+int panthor_sched_init(struct panthor_device *ptdev); -+void panthor_sched_unplug(struct panthor_device *ptdev); -+void panthor_sched_pre_reset(struct panthor_device *ptdev); -+void panthor_sched_post_reset(struct panthor_device *ptdev); -+void panthor_sched_suspend(struct panthor_device *ptdev); -+void panthor_sched_resume(struct panthor_device *ptdev); -+ -+void panthor_sched_report_mmu_fault(struct panthor_device *ptdev); -+void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events); -+ -+#endif --- -2.42.0 - - -From 9921e74c5e16d4f7ed17a614ff6680d573d3c80d Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:42 +0100 -Subject: [PATCH 61/81] drm/panthor: Add the driver frontend block - -This is the last piece missing to expose the driver to the outside -world. - -This is basically a wrapper between the ioctls and the other logical -blocks. - -v4: -- Add an ioctl to let the UMD query the VM state -- Fix kernel doc -- Let panthor_device_init() call panthor_device_init() -- Fix cleanup ordering in the panthor_init() error path -- Add Steve's and Liviu's R-b - -v3: -- Add acks for the MIT/GPL2 relicensing -- Fix 32-bit support -- Account for panthor_vm and panthor_sched changes -- Simplify the resv preparation/update logic -- Use a linked list rather than xarray for list of signals. -- Simplify panthor_get_uobj_array by returning the newly allocated - array. -- Drop the "DOC" for job submission helpers and move the relevant - comments to panthor_ioctl_group_submit(). -- Add helpers sync_op_is_signal()/sync_op_is_wait(). -- Simplify return type of panthor_submit_ctx_add_sync_signal() and - panthor_submit_ctx_get_sync_signal(). -- Drop WARN_ON from panthor_submit_ctx_add_job(). -- Fix typos in comments. - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Reviewed-by: Steven Price -Reviewed-by: Liviu Dudau -Link: https://lore.kernel.org/r/20240122163047.1954733-12-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/panthor/panthor_drv.c | 1471 +++++++++++++++++++++++++ - 1 file changed, 1471 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/panthor_drv.c - -diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c -new file mode 100644 -index 000000000000..ff98bb346578 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/panthor_drv.c -@@ -0,0 +1,1471 @@ -+// SPDX-License-Identifier: GPL-2.0 or MIT -+/* Copyright 2018 Marty E. Plummer */ -+/* Copyright 2019 Linaro, Ltd., Rob Herring */ -+/* Copyright 2019 Collabora ltd. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "panthor_sched.h" -+#include "panthor_device.h" -+#include "panthor_gem.h" -+#include "panthor_heap.h" -+#include "panthor_fw.h" -+#include "panthor_mmu.h" -+#include "panthor_gpu.h" -+#include "panthor_regs.h" -+ -+/** -+ * DOC: user <-> kernel object copy helpers. -+ */ -+ -+/** -+ * panthor_set_uobj() - Copy kernel object to user object. -+ * @usr_ptr: Users pointer. -+ * @usr_size: Size of the user object. -+ * @min_size: Minimum size for this object. -+ * @kern_size: Size of the kernel object. -+ * @in: Address of the kernel object to copy. -+ * -+ * Helper automating kernel -> user object copies. -+ * -+ * Don't use this function directly, use PANTHOR_UOBJ_SET() instead. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_set_uobj(u64 usr_ptr, u32 usr_size, u32 min_size, u32 kern_size, const void *in) -+{ -+ /* User size shouldn't be smaller than the minimal object size. */ -+ if (usr_size < min_size) -+ return -EINVAL; -+ -+ if (copy_to_user(u64_to_user_ptr(usr_ptr), in, min_t(u32, usr_size, kern_size))) -+ return -EFAULT; -+ -+ /* When the kernel object is smaller than the user object, we fill the gap with -+ * zeros. -+ */ -+ if (usr_size > kern_size && -+ clear_user(u64_to_user_ptr(usr_ptr + kern_size), usr_size - kern_size)) { -+ return -EFAULT; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_get_uobj_array() - Copy a user object array into a kernel accessible object array. -+ * @in: The object array to copy. -+ * @min_stride: Minimum array stride. -+ * @obj_size: Kernel object size. -+ * -+ * Helper automating user -> kernel object copies. -+ * -+ * Don't use this function directly, use PANTHOR_UOBJ_GET_ARRAY() instead. -+ * -+ * Return: newly allocated object array or an ERR_PTR on error. -+ */ -+static void * -+panthor_get_uobj_array(const struct drm_panthor_obj_array *in, u32 min_stride, -+ u32 obj_size) -+{ -+ int ret = 0; -+ void *out_alloc; -+ -+ /* User stride must be at least the minimum object size, otherwise it might -+ * lack useful information. -+ */ -+ if (in->stride < min_stride) -+ return ERR_PTR(-EINVAL); -+ -+ if (!in->count) -+ return NULL; -+ -+ out_alloc = kvmalloc_array(in->count, obj_size, GFP_KERNEL); -+ if (!out_alloc) -+ return ERR_PTR(-ENOMEM); -+ -+ if (obj_size == in->stride) { -+ /* Fast path when user/kernel have the same uAPI header version. */ -+ if (copy_from_user(out_alloc, u64_to_user_ptr(in->array), -+ (unsigned long)obj_size * in->count)) -+ ret = -EFAULT; -+ } else { -+ void __user *in_ptr = u64_to_user_ptr(in->array); -+ void *out_ptr = out_alloc; -+ -+ /* If the sizes differ, we need to copy elements one by one. */ -+ for (u32 i = 0; i < in->count; i++) { -+ ret = copy_struct_from_user(out_ptr, obj_size, in_ptr, in->stride); -+ if (ret) -+ break; -+ -+ out_ptr += obj_size; -+ in_ptr += in->stride; -+ } -+ } -+ -+ if (ret) { -+ kvfree(out_alloc); -+ return ERR_PTR(ret); -+ } -+ -+ return out_alloc; -+} -+ -+/** -+ * PANTHOR_UOBJ_MIN_SIZE_INTERNAL() - Get the minimum user object size -+ * @_typename: Object type. -+ * @_last_mandatory_field: Last mandatory field. -+ * -+ * Get the minimum user object size based on the last mandatory field name, -+ * A.K.A, the name of the last field of the structure at the time this -+ * structure was added to the uAPI. -+ * -+ * Don't use directly, use PANTHOR_UOBJ_DECL() instead. -+ */ -+#define PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) \ -+ (offsetof(_typename, _last_mandatory_field) + \ -+ sizeof(((_typename *)NULL)->_last_mandatory_field)) -+ -+/** -+ * PANTHOR_UOBJ_DECL() - Declare a new uAPI object whose subject to -+ * evolutions. -+ * @_typename: Object type. -+ * @_last_mandatory_field: Last mandatory field. -+ * -+ * Should be used to extend the PANTHOR_UOBJ_MIN_SIZE() list. -+ */ -+#define PANTHOR_UOBJ_DECL(_typename, _last_mandatory_field) \ -+ _typename : PANTHOR_UOBJ_MIN_SIZE_INTERNAL(_typename, _last_mandatory_field) -+ -+/** -+ * PANTHOR_UOBJ_MIN_SIZE() - Get the minimum size of a given uAPI object -+ * @_obj_name: Object to get the minimum size of. -+ * -+ * Don't use this macro directly, it's automatically called by -+ * PANTHOR_UOBJ_{SET,GET_ARRAY}(). -+ */ -+#define PANTHOR_UOBJ_MIN_SIZE(_obj_name) \ -+ _Generic(_obj_name, \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_gpu_info, tiler_present), \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_csif_info, pad), \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_sync_op, timeline_value), \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, ringbuf_size), \ -+ PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs)) -+ -+/** -+ * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object. -+ * @_dest_usr_ptr: User pointer to copy to. -+ * @_usr_size: Size of the user object. -+ * @_src_obj: Kernel object to copy (not a pointer). -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+#define PANTHOR_UOBJ_SET(_dest_usr_ptr, _usr_size, _src_obj) \ -+ panthor_set_uobj(_dest_usr_ptr, _usr_size, \ -+ PANTHOR_UOBJ_MIN_SIZE(_src_obj), \ -+ sizeof(_src_obj), &(_src_obj)) -+ -+/** -+ * PANTHOR_UOBJ_GET_ARRAY() - Copy a user object array to a kernel accessible -+ * object array. -+ * @_dest_array: Local variable that will hold the newly allocated kernel -+ * object array. -+ * @_uobj_array: The drm_panthor_obj_array object describing the user object -+ * array. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+#define PANTHOR_UOBJ_GET_ARRAY(_dest_array, _uobj_array) \ -+ ({ \ -+ typeof(_dest_array) _tmp; \ -+ _tmp = panthor_get_uobj_array(_uobj_array, \ -+ PANTHOR_UOBJ_MIN_SIZE((_dest_array)[0]), \ -+ sizeof((_dest_array)[0])); \ -+ if (!IS_ERR(_tmp)) \ -+ _dest_array = _tmp; \ -+ PTR_ERR_OR_ZERO(_tmp); \ -+ }) -+ -+/** -+ * struct panthor_sync_signal - Represent a synchronization object point to attach -+ * our job fence to. -+ * -+ * This structure is here to keep track of fences that are currently bound to -+ * a specific syncobj point. -+ * -+ * At the beginning of a job submission, the fence -+ * is retrieved from the syncobj itself, and can be NULL if no fence was attached -+ * to this point. -+ * -+ * At the end, it points to the fence of the last job that had a -+ * %DRM_PANTHOR_SYNC_OP_SIGNAL on this syncobj. -+ * -+ * With jobs being submitted in batches, the fence might change several times during -+ * the process, allowing one job to wait on a job that's part of the same submission -+ * but appears earlier in the drm_panthor_group_submit::queue_submits array. -+ */ -+struct panthor_sync_signal { -+ /** @node: list_head to track signal ops within a submit operation */ -+ struct list_head node; -+ -+ /** @handle: The syncobj handle. */ -+ u32 handle; -+ -+ /** -+ * @point: The syncobj point. -+ * -+ * Zero for regular syncobjs, and non-zero for timeline syncobjs. -+ */ -+ u64 point; -+ -+ /** -+ * @syncobj: The sync object pointed by @handle. -+ */ -+ struct drm_syncobj *syncobj; -+ -+ /** -+ * @chain: Chain object used to link the new fence to an existing -+ * timeline syncobj. -+ * -+ * NULL for regular syncobj, non-NULL for timeline syncobjs. -+ */ -+ struct dma_fence_chain *chain; -+ -+ /** -+ * @fence: The fence to assign to the syncobj or syncobj-point. -+ */ -+ struct dma_fence *fence; -+}; -+ -+/** -+ * struct panthor_job_ctx - Job context -+ */ -+struct panthor_job_ctx { -+ /** @job: The job that is about to be submitted to drm_sched. */ -+ struct drm_sched_job *job; -+ -+ /** @syncops: Array of sync operations. */ -+ struct drm_panthor_sync_op *syncops; -+ -+ /** @syncop_count: Number of sync operations. */ -+ u32 syncop_count; -+}; -+ -+/** -+ * struct panthor_submit_ctx - Submission context -+ * -+ * Anything that's related to a submission (%DRM_IOCTL_PANTHOR_VM_BIND or -+ * %DRM_IOCTL_PANTHOR_GROUP_SUBMIT) is kept here, so we can automate the -+ * initialization and cleanup steps. -+ */ -+struct panthor_submit_ctx { -+ /** @file: DRM file this submission happens on. */ -+ struct drm_file *file; -+ -+ /** -+ * @signals: List of struct panthor_sync_signal. -+ * -+ * %DRM_PANTHOR_SYNC_OP_SIGNAL operations will be recorded here, -+ * and %DRM_PANTHOR_SYNC_OP_WAIT will first check if an entry -+ * matching the syncobj+point exists before calling -+ * drm_syncobj_find_fence(). This allows us to describe dependencies -+ * existing between jobs that are part of the same batch. -+ */ -+ struct list_head signals; -+ -+ /** @jobs: Array of jobs. */ -+ struct panthor_job_ctx *jobs; -+ -+ /** @job_count: Number of entries in the @jobs array. */ -+ u32 job_count; -+ -+ /** @exec: drm_exec context used to acquire and prepare resv objects. */ -+ struct drm_exec exec; -+}; -+ -+#define PANTHOR_SYNC_OP_FLAGS_MASK \ -+ (DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK | DRM_PANTHOR_SYNC_OP_SIGNAL) -+ -+static bool sync_op_is_signal(const struct drm_panthor_sync_op *sync_op) -+{ -+ return !!(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); -+} -+ -+static bool sync_op_is_wait(const struct drm_panthor_sync_op *sync_op) -+{ -+ /* Note that DRM_PANTHOR_SYNC_OP_WAIT == 0 */ -+ return !(sync_op->flags & DRM_PANTHOR_SYNC_OP_SIGNAL); -+} -+ -+/** -+ * panthor_check_sync_op() - Check drm_panthor_sync_op fields -+ * @sync_op: The sync operation to check. -+ * -+ * Return: 0 on success, -EINVAL otherwise. -+ */ -+static int -+panthor_check_sync_op(const struct drm_panthor_sync_op *sync_op) -+{ -+ u8 handle_type; -+ -+ if (sync_op->flags & ~PANTHOR_SYNC_OP_FLAGS_MASK) -+ return -EINVAL; -+ -+ handle_type = sync_op->flags & DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_MASK; -+ if (handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && -+ handle_type != DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_TIMELINE_SYNCOBJ) -+ return -EINVAL; -+ -+ if (handle_type == DRM_PANTHOR_SYNC_OP_HANDLE_TYPE_SYNCOBJ && -+ sync_op->timeline_value != 0) -+ return -EINVAL; -+ -+ return 0; -+} -+ -+/** -+ * panthor_sync_signal_free() - Release resources and free a panthor_sync_signal object -+ * @sig_sync: Signal object to free. -+ */ -+static void -+panthor_sync_signal_free(struct panthor_sync_signal *sig_sync) -+{ -+ if (!sig_sync) -+ return; -+ -+ drm_syncobj_put(sig_sync->syncobj); -+ dma_fence_chain_free(sig_sync->chain); -+ dma_fence_put(sig_sync->fence); -+ kfree(sig_sync); -+} -+ -+/** -+ * panthor_submit_ctx_add_sync_signal() - Add a signal operation to a submit context -+ * @ctx: Context to add the signal operation to. -+ * @handle: Syncobj handle. -+ * @point: Syncobj point. -+ * -+ * Return: 0 on success, otherwise negative error value. -+ */ -+static int -+panthor_submit_ctx_add_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) -+{ -+ struct panthor_sync_signal *sig_sync; -+ struct dma_fence *cur_fence; -+ int ret; -+ -+ sig_sync = kzalloc(sizeof(*sig_sync), GFP_KERNEL); -+ if (!sig_sync) -+ return -ENOMEM; -+ -+ sig_sync->handle = handle; -+ sig_sync->point = point; -+ -+ if (point > 0) { -+ sig_sync->chain = dma_fence_chain_alloc(); -+ if (!sig_sync->chain) { -+ ret = -ENOMEM; -+ goto err_free_sig_sync; -+ } -+ } -+ -+ sig_sync->syncobj = drm_syncobj_find(ctx->file, handle); -+ if (!sig_sync->syncobj) { -+ ret = -EINVAL; -+ goto err_free_sig_sync; -+ } -+ -+ /* Retrieve the current fence attached to that point. It's -+ * perfectly fine to get a NULL fence here, it just means there's -+ * no fence attached to that point yet. -+ */ -+ if (!drm_syncobj_find_fence(ctx->file, handle, point, 0, &cur_fence)) -+ sig_sync->fence = cur_fence; -+ -+ list_add_tail(&sig_sync->node, &ctx->signals); -+ -+ return 0; -+ -+err_free_sig_sync: -+ panthor_sync_signal_free(sig_sync); -+ return ret; -+} -+ -+/** -+ * panthor_submit_ctx_search_sync_signal() - Search an existing signal operation in a -+ * submit context. -+ * @ctx: Context to search the signal operation in. -+ * @handle: Syncobj handle. -+ * @point: Syncobj point. -+ * -+ * Return: A valid panthor_sync_signal object if found, NULL otherwise. -+ */ -+static struct panthor_sync_signal * -+panthor_submit_ctx_search_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) -+{ -+ struct panthor_sync_signal *sig_sync; -+ -+ list_for_each_entry(sig_sync, &ctx->signals, node) { -+ if (handle == sig_sync->handle && point == sig_sync->point) -+ return sig_sync; -+ } -+ -+ return NULL; -+} -+ -+/** -+ * panthor_submit_ctx_add_job() - Add a job to a submit context -+ * @ctx: Context to search the signal operation in. -+ * @idx: Index of the job in the context. -+ * @job: Job to add. -+ * @syncs: Sync operations provided by userspace. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_add_job(struct panthor_submit_ctx *ctx, u32 idx, -+ struct drm_sched_job *job, -+ const struct drm_panthor_obj_array *syncs) -+{ -+ int ret; -+ -+ ctx->jobs[idx].job = job; -+ -+ ret = PANTHOR_UOBJ_GET_ARRAY(ctx->jobs[idx].syncops, syncs); -+ if (ret) -+ return ret; -+ -+ ctx->jobs[idx].syncop_count = syncs->count; -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_get_sync_signal() - Search signal operation and add one if none was found. -+ * @ctx: Context to search the signal operation in. -+ * @handle: Syncobj handle. -+ * @point: Syncobj point. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_get_sync_signal(struct panthor_submit_ctx *ctx, u32 handle, u64 point) -+{ -+ struct panthor_sync_signal *sig_sync; -+ -+ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, handle, point); -+ if (sig_sync) -+ return 0; -+ -+ return panthor_submit_ctx_add_sync_signal(ctx, handle, point); -+} -+ -+/** -+ * panthor_submit_ctx_update_job_sync_signal_fences() - Update fences -+ * on the signal operations specified by a job. -+ * @ctx: Context to search the signal operation in. -+ * @job_idx: Index of the job to operate on. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_update_job_sync_signal_fences(struct panthor_submit_ctx *ctx, -+ u32 job_idx) -+{ -+ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, -+ struct panthor_device, -+ base); -+ struct dma_fence *done_fence = &ctx->jobs[job_idx].job->s_fence->finished; -+ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; -+ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; -+ -+ for (u32 i = 0; i < sync_op_count; i++) { -+ struct dma_fence *old_fence; -+ struct panthor_sync_signal *sig_sync; -+ -+ if (!sync_op_is_signal(&sync_ops[i])) -+ continue; -+ -+ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, -+ sync_ops[i].timeline_value); -+ if (drm_WARN_ON(&ptdev->base, !sig_sync)) -+ return -EINVAL; -+ -+ old_fence = sig_sync->fence; -+ sig_sync->fence = dma_fence_get(done_fence); -+ dma_fence_put(old_fence); -+ -+ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) -+ return -EINVAL; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_collect_job_signal_ops() - Iterate over all job signal operations -+ * and add them to the context. -+ * @ctx: Context to search the signal operation in. -+ * @job_idx: Index of the job to operate on. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_collect_job_signal_ops(struct panthor_submit_ctx *ctx, -+ u32 job_idx) -+{ -+ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; -+ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; -+ -+ for (u32 i = 0; i < sync_op_count; i++) { -+ int ret; -+ -+ if (!sync_op_is_signal(&sync_ops[i])) -+ continue; -+ -+ ret = panthor_check_sync_op(&sync_ops[i]); -+ if (ret) -+ return ret; -+ -+ ret = panthor_submit_ctx_get_sync_signal(ctx, -+ sync_ops[i].handle, -+ sync_ops[i].timeline_value); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_push_fences() - Iterate over the signal array, and for each entry, push -+ * the currently assigned fence to the associated syncobj. -+ * @ctx: Context to push fences on. -+ * -+ * This is the last step of a submission procedure, and is done once we know the submission -+ * is effective and job fences are guaranteed to be signaled in finite time. -+ */ -+static void -+panthor_submit_ctx_push_fences(struct panthor_submit_ctx *ctx) -+{ -+ struct panthor_sync_signal *sig_sync; -+ -+ list_for_each_entry(sig_sync, &ctx->signals, node) { -+ if (sig_sync->chain) { -+ drm_syncobj_add_point(sig_sync->syncobj, sig_sync->chain, -+ sig_sync->fence, sig_sync->point); -+ sig_sync->chain = NULL; -+ } else { -+ drm_syncobj_replace_fence(sig_sync->syncobj, sig_sync->fence); -+ } -+ } -+} -+ -+/** -+ * panthor_submit_ctx_add_sync_deps_to_job() - Add sync wait operations as -+ * job dependencies. -+ * @ctx: Submit context. -+ * @job_idx: Index of the job to operate on. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_add_sync_deps_to_job(struct panthor_submit_ctx *ctx, -+ u32 job_idx) -+{ -+ struct panthor_device *ptdev = container_of(ctx->file->minor->dev, -+ struct panthor_device, -+ base); -+ const struct drm_panthor_sync_op *sync_ops = ctx->jobs[job_idx].syncops; -+ struct drm_sched_job *job = ctx->jobs[job_idx].job; -+ u32 sync_op_count = ctx->jobs[job_idx].syncop_count; -+ int ret = 0; -+ -+ for (u32 i = 0; i < sync_op_count; i++) { -+ struct panthor_sync_signal *sig_sync; -+ struct dma_fence *fence; -+ -+ if (!sync_op_is_wait(&sync_ops[i])) -+ continue; -+ -+ ret = panthor_check_sync_op(&sync_ops[i]); -+ if (ret) -+ return ret; -+ -+ sig_sync = panthor_submit_ctx_search_sync_signal(ctx, sync_ops[i].handle, -+ sync_ops[i].timeline_value); -+ if (sig_sync) { -+ if (drm_WARN_ON(&ptdev->base, !sig_sync->fence)) -+ return -EINVAL; -+ -+ fence = dma_fence_get(sig_sync->fence); -+ } else { -+ ret = drm_syncobj_find_fence(ctx->file, sync_ops[i].handle, -+ sync_ops[i].timeline_value, -+ 0, &fence); -+ if (ret) -+ return ret; -+ } -+ -+ ret = drm_sched_job_add_dependency(job, fence); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_collect_jobs_signal_ops() - Collect all signal operations -+ * and add them to the submit context. -+ * @ctx: Submit context. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_collect_jobs_signal_ops(struct panthor_submit_ctx *ctx) -+{ -+ for (u32 i = 0; i < ctx->job_count; i++) { -+ int ret; -+ -+ ret = panthor_submit_ctx_collect_job_signal_ops(ctx, i); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_add_deps_and_arm_jobs() - Add jobs dependencies and arm jobs -+ * @ctx: Submit context. -+ * -+ * Must be called after the resv preparation has been taken care of. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int -+panthor_submit_ctx_add_deps_and_arm_jobs(struct panthor_submit_ctx *ctx) -+{ -+ for (u32 i = 0; i < ctx->job_count; i++) { -+ int ret; -+ -+ ret = panthor_submit_ctx_add_sync_deps_to_job(ctx, i); -+ if (ret) -+ return ret; -+ -+ drm_sched_job_arm(ctx->jobs[i].job); -+ -+ ret = panthor_submit_ctx_update_job_sync_signal_fences(ctx, i); -+ if (ret) -+ return ret; -+ } -+ -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_push_jobs() - Push jobs to their scheduling entities. -+ * @ctx: Submit context. -+ * @upd_resvs: Callback used to update reservation objects that were previously -+ * preapred. -+ */ -+static void -+panthor_submit_ctx_push_jobs(struct panthor_submit_ctx *ctx, -+ void (*upd_resvs)(struct drm_exec *, struct drm_sched_job *)) -+{ -+ for (u32 i = 0; i < ctx->job_count; i++) { -+ upd_resvs(&ctx->exec, ctx->jobs[i].job); -+ drm_sched_entity_push_job(ctx->jobs[i].job); -+ -+ /* Job is owned by the scheduler now. */ -+ ctx->jobs[i].job = NULL; -+ } -+ -+ panthor_submit_ctx_push_fences(ctx); -+} -+ -+/** -+ * panthor_submit_ctx_init() - Initializes a submission context -+ * @ctx: Submit context to initialize. -+ * @file: drm_file this submission happens on. -+ * @job_count: Number of jobs that will be submitted. -+ * -+ * Return: 0 on success, a negative error code otherwise. -+ */ -+static int panthor_submit_ctx_init(struct panthor_submit_ctx *ctx, -+ struct drm_file *file, u32 job_count) -+{ -+ ctx->jobs = kvmalloc_array(job_count, sizeof(*ctx->jobs), -+ GFP_KERNEL | __GFP_ZERO); -+ if (!ctx->jobs) -+ return -ENOMEM; -+ -+ ctx->file = file; -+ ctx->job_count = job_count; -+ INIT_LIST_HEAD(&ctx->signals); -+ drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); -+ return 0; -+} -+ -+/** -+ * panthor_submit_ctx_cleanup() - Cleanup a submission context -+ * @ctx: Submit context to cleanup. -+ * @job_put: Job put callback. -+ */ -+static void panthor_submit_ctx_cleanup(struct panthor_submit_ctx *ctx, -+ void (*job_put)(struct drm_sched_job *)) -+{ -+ struct panthor_sync_signal *sig_sync, *tmp; -+ unsigned long i; -+ -+ drm_exec_fini(&ctx->exec); -+ -+ list_for_each_entry_safe(sig_sync, tmp, &ctx->signals, node) -+ panthor_sync_signal_free(sig_sync); -+ -+ for (i = 0; i < ctx->job_count; i++) { -+ job_put(ctx->jobs[i].job); -+ kvfree(ctx->jobs[i].syncops); -+ } -+ -+ kvfree(ctx->jobs); -+} -+ -+static int panthor_ioctl_dev_query(struct drm_device *ddev, void *data, struct drm_file *file) -+{ -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ struct drm_panthor_dev_query *args = data; -+ -+ if (!args->pointer) { -+ switch (args->type) { -+ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: -+ args->size = sizeof(ptdev->gpu_info); -+ return 0; -+ -+ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: -+ args->size = sizeof(ptdev->csif_info); -+ return 0; -+ -+ default: -+ return -EINVAL; -+ } -+ } -+ -+ switch (args->type) { -+ case DRM_PANTHOR_DEV_QUERY_GPU_INFO: -+ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->gpu_info); -+ -+ case DRM_PANTHOR_DEV_QUERY_CSIF_INFO: -+ return PANTHOR_UOBJ_SET(args->pointer, args->size, ptdev->csif_info); -+ -+ default: -+ return -EINVAL; -+ } -+} -+ -+#define PANTHOR_VM_CREATE_FLAGS 0 -+ -+static int panthor_ioctl_vm_create(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_vm_create *args = data; -+ int cookie, ret; -+ -+ if (!drm_dev_enter(ddev, &cookie)) -+ return -ENODEV; -+ -+ ret = panthor_vm_pool_create_vm(ptdev, pfile->vms, args); -+ if (ret >= 0) { -+ args->id = ret; -+ ret = 0; -+ } -+ -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_vm_destroy *args = data; -+ -+ if (args->pad) -+ return -EINVAL; -+ -+ return panthor_vm_pool_destroy_vm(pfile->vms, args->id); -+} -+ -+#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP -+ -+static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_bo_create *args = data; -+ struct panthor_vm *vm = NULL; -+ int cookie, ret; -+ -+ if (!drm_dev_enter(ddev, &cookie)) -+ return -ENODEV; -+ -+ if (!args->size || args->pad || -+ (args->flags & ~PANTHOR_BO_FLAGS)) { -+ ret = -EINVAL; -+ goto out_dev_exit; -+ } -+ -+ if (args->exclusive_vm_id) { -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id); -+ if (!vm) { -+ ret = -EINVAL; -+ goto out_dev_exit; -+ } -+ } -+ -+ ret = panthor_gem_create_with_handle(file, ddev, vm, args->size, -+ args->flags, &args->handle); -+ -+ panthor_vm_put(vm); -+ -+out_dev_exit: -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static int panthor_ioctl_bo_mmap_offset(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct drm_panthor_bo_mmap_offset *args = data; -+ struct drm_gem_object *obj; -+ int ret; -+ -+ if (args->pad) -+ return -EINVAL; -+ -+ obj = drm_gem_object_lookup(file, args->handle); -+ if (!obj) -+ return -ENOENT; -+ -+ ret = drm_gem_create_mmap_offset(obj); -+ if (ret) -+ goto out; -+ -+ args->offset = drm_vma_node_offset_addr(&obj->vma_node); -+ -+out: -+ drm_gem_object_put(obj); -+ return ret; -+} -+ -+static int panthor_ioctl_group_submit(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_group_submit *args = data; -+ struct drm_panthor_queue_submit *jobs_args; -+ struct panthor_submit_ctx ctx; -+ int ret = 0, cookie; -+ -+ if (args->pad) -+ return -EINVAL; -+ -+ if (!drm_dev_enter(ddev, &cookie)) -+ return -ENODEV; -+ -+ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->queue_submits); -+ if (ret) -+ goto out_dev_exit; -+ -+ ret = panthor_submit_ctx_init(&ctx, file, args->queue_submits.count); -+ if (ret) -+ goto out_free_jobs_args; -+ -+ /* Create jobs and attach sync operations */ -+ for (u32 i = 0; i < args->queue_submits.count; i++) { -+ const struct drm_panthor_queue_submit *qsubmit = &jobs_args[i]; -+ struct drm_sched_job *job; -+ -+ job = panthor_job_create(pfile, args->group_handle, qsubmit); -+ if (IS_ERR(job)) { -+ ret = PTR_ERR(job); -+ goto out_cleanup_submit_ctx; -+ } -+ -+ ret = panthor_submit_ctx_add_job(&ctx, i, job, &qsubmit->syncs); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ } -+ -+ /* -+ * Collect signal operations on all jobs, such that each job can pick -+ * from it for its dependencies and update the fence to signal when the -+ * job is submitted. -+ */ -+ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ -+ /* -+ * We acquire/prepare revs on all jobs before proceeding with the -+ * dependency registration. -+ * -+ * This is solving two problems: -+ * 1. drm_sched_job_arm() and drm_sched_entity_push_job() must be -+ * protected by a lock to make sure no concurrent access to the same -+ * entity get interleaved, which would mess up with the fence seqno -+ * ordering. Luckily, one of the resv being acquired is the VM resv, -+ * and a scheduling entity is only bound to a single VM. As soon as -+ * we acquire the VM resv, we should be safe. -+ * 2. Jobs might depend on fences that were issued by previous jobs in -+ * the same batch, so we can't add dependencies on all jobs before -+ * arming previous jobs and registering the fence to the signal -+ * array, otherwise we might miss dependencies, or point to an -+ * outdated fence. -+ */ -+ if (args->queue_submits.count > 0) { -+ /* All jobs target the same group, so they also point to the same VM. */ -+ struct panthor_vm *vm = panthor_job_vm(ctx.jobs[0].job); -+ -+ drm_exec_until_all_locked(&ctx.exec) { -+ ret = panthor_vm_prepare_mapped_bos_resvs(&ctx.exec, vm, -+ args->queue_submits.count); -+ } -+ -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ } -+ -+ /* -+ * Now that resvs are locked/prepared, we can iterate over each job to -+ * add the dependencies, arm the job fence, register the job fence to -+ * the signal array. -+ */ -+ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ -+ /* Nothing can fail after that point, so we can make our job fences -+ * visible to the outside world. Push jobs and set the job fences to -+ * the resv slots we reserved. This also pushes the fences to the -+ * syncobjs that are part of the signal array. -+ */ -+ panthor_submit_ctx_push_jobs(&ctx, panthor_job_update_resvs); -+ -+out_cleanup_submit_ctx: -+ panthor_submit_ctx_cleanup(&ctx, panthor_job_put); -+ -+out_free_jobs_args: -+ kvfree(jobs_args); -+ -+out_dev_exit: -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static int panthor_ioctl_group_destroy(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_group_destroy *args = data; -+ -+ if (args->pad) -+ return -EINVAL; -+ -+ return panthor_group_destroy(pfile, args->group_handle); -+} -+ -+static int panthor_ioctl_group_create(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_group_create *args = data; -+ struct drm_panthor_queue_create *queue_args; -+ int ret; -+ -+ if (!args->queues.count) -+ return -EINVAL; -+ -+ ret = PANTHOR_UOBJ_GET_ARRAY(queue_args, &args->queues); -+ if (ret) -+ return ret; -+ -+ ret = panthor_group_create(pfile, args, queue_args); -+ if (ret >= 0) { -+ args->group_handle = ret; -+ ret = 0; -+ } -+ -+ kvfree(queue_args); -+ return ret; -+} -+ -+static int panthor_ioctl_group_get_state(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_group_get_state *args = data; -+ -+ return panthor_group_get_state(pfile, args); -+} -+ -+static int panthor_ioctl_tiler_heap_create(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_tiler_heap_create *args = data; -+ struct panthor_heap_pool *pool; -+ struct panthor_vm *vm; -+ int ret; -+ -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); -+ if (!vm) -+ return -EINVAL; -+ -+ pool = panthor_vm_get_heap_pool(vm, true); -+ if (IS_ERR(pool)) { -+ ret = PTR_ERR(pool); -+ goto out_put_vm; -+ } -+ -+ ret = panthor_heap_create(pool, -+ args->initial_chunk_count, -+ args->chunk_size, -+ args->max_chunks, -+ args->target_in_flight, -+ &args->tiler_heap_ctx_gpu_va, -+ &args->first_heap_chunk_gpu_va); -+ if (ret < 0) -+ goto out_put_heap_pool; -+ -+ /* Heap pools are per-VM. We combine the VM and HEAP id to make -+ * a unique heap handle. -+ */ -+ args->handle = (args->vm_id << 16) | ret; -+ ret = 0; -+ -+out_put_heap_pool: -+ panthor_heap_pool_put(pool); -+ -+out_put_vm: -+ panthor_vm_put(vm); -+ return ret; -+} -+ -+static int panthor_ioctl_tiler_heap_destroy(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_tiler_heap_destroy *args = data; -+ struct panthor_heap_pool *pool; -+ struct panthor_vm *vm; -+ int ret; -+ -+ if (args->pad) -+ return -EINVAL; -+ -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->handle >> 16); -+ if (!vm) -+ return -EINVAL; -+ -+ pool = panthor_vm_get_heap_pool(vm, false); -+ if (!pool) { -+ ret = -EINVAL; -+ goto out_put_vm; -+ } -+ -+ ret = panthor_heap_destroy(pool, args->handle & GENMASK(15, 0)); -+ panthor_heap_pool_put(pool); -+ -+out_put_vm: -+ panthor_vm_put(vm); -+ return ret; -+} -+ -+static int panthor_ioctl_vm_bind_async(struct drm_device *ddev, -+ struct drm_panthor_vm_bind *args, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_vm_bind_op *jobs_args; -+ struct panthor_submit_ctx ctx; -+ struct panthor_vm *vm; -+ int ret = 0; -+ -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); -+ if (!vm) -+ return -EINVAL; -+ -+ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); -+ if (ret) -+ goto out_put_vm; -+ -+ ret = panthor_submit_ctx_init(&ctx, file, args->ops.count); -+ if (ret) -+ goto out_free_jobs_args; -+ -+ for (u32 i = 0; i < args->ops.count; i++) { -+ struct drm_panthor_vm_bind_op *op = &jobs_args[i]; -+ struct drm_sched_job *job; -+ -+ job = panthor_vm_bind_job_create(file, vm, op); -+ if (IS_ERR(job)) { -+ ret = PTR_ERR(job); -+ goto out_cleanup_submit_ctx; -+ } -+ -+ ret = panthor_submit_ctx_add_job(&ctx, i, job, &op->syncs); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ } -+ -+ ret = panthor_submit_ctx_collect_jobs_signal_ops(&ctx); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ -+ /* Prepare reservation objects for each VM_BIND job. */ -+ drm_exec_until_all_locked(&ctx.exec) { -+ for (u32 i = 0; i < ctx.job_count; i++) { -+ ret = panthor_vm_bind_job_prepare_resvs(&ctx.exec, ctx.jobs[i].job); -+ drm_exec_retry_on_contention(&ctx.exec); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ } -+ } -+ -+ ret = panthor_submit_ctx_add_deps_and_arm_jobs(&ctx); -+ if (ret) -+ goto out_cleanup_submit_ctx; -+ -+ /* Nothing can fail after that point. */ -+ panthor_submit_ctx_push_jobs(&ctx, panthor_vm_bind_job_update_resvs); -+ -+out_cleanup_submit_ctx: -+ panthor_submit_ctx_cleanup(&ctx, panthor_vm_bind_job_put); -+ -+out_free_jobs_args: -+ kvfree(jobs_args); -+ -+out_put_vm: -+ panthor_vm_put(vm); -+ return ret; -+} -+ -+static int panthor_ioctl_vm_bind_sync(struct drm_device *ddev, -+ struct drm_panthor_vm_bind *args, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_vm_bind_op *jobs_args; -+ struct panthor_vm *vm; -+ int ret; -+ -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); -+ if (!vm) -+ return -EINVAL; -+ -+ ret = PANTHOR_UOBJ_GET_ARRAY(jobs_args, &args->ops); -+ if (ret) -+ goto out_put_vm; -+ -+ for (u32 i = 0; i < args->ops.count; i++) { -+ ret = panthor_vm_bind_exec_sync_op(file, vm, &jobs_args[i]); -+ if (ret) { -+ /* Update ops.count so the user knows where things failed. */ -+ args->ops.count = i; -+ break; -+ } -+ } -+ -+ kvfree(jobs_args); -+ -+out_put_vm: -+ panthor_vm_put(vm); -+ return ret; -+} -+ -+#define PANTHOR_VM_BIND_FLAGS DRM_PANTHOR_VM_BIND_ASYNC -+ -+static int panthor_ioctl_vm_bind(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct drm_panthor_vm_bind *args = data; -+ int cookie, ret; -+ -+ if (!drm_dev_enter(ddev, &cookie)) -+ return -ENODEV; -+ -+ if (args->flags & DRM_PANTHOR_VM_BIND_ASYNC) -+ ret = panthor_ioctl_vm_bind_async(ddev, args, file); -+ else -+ ret = panthor_ioctl_vm_bind_sync(ddev, args, file); -+ -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static int panthor_ioctl_vm_get_state(struct drm_device *ddev, void *data, -+ struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ struct drm_panthor_vm_get_state *args = data; -+ struct panthor_vm *vm; -+ -+ vm = panthor_vm_pool_get_vm(pfile->vms, args->vm_id); -+ if (!vm) -+ return -EINVAL; -+ -+ if (panthor_vm_is_unusable(vm)) -+ args->state = DRM_PANTHOR_VM_STATE_UNUSABLE; -+ else -+ args->state = DRM_PANTHOR_VM_STATE_USABLE; -+ -+ panthor_vm_put(vm); -+ return 0; -+} -+ -+static int -+panthor_open(struct drm_device *ddev, struct drm_file *file) -+{ -+ struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base); -+ struct panthor_file *pfile; -+ int ret; -+ -+ if (!try_module_get(THIS_MODULE)) -+ return -EINVAL; -+ -+ pfile = kzalloc(sizeof(*pfile), GFP_KERNEL); -+ if (!pfile) { -+ ret = -ENOMEM; -+ goto err_put_mod; -+ } -+ -+ pfile->ptdev = ptdev; -+ -+ ret = panthor_vm_pool_create(pfile); -+ if (ret) -+ goto err_free_file; -+ -+ ret = panthor_group_pool_create(pfile); -+ if (ret) -+ goto err_destroy_vm_pool; -+ -+ file->driver_priv = pfile; -+ return 0; -+ -+err_destroy_vm_pool: -+ panthor_vm_pool_destroy(pfile); -+ -+err_free_file: -+ kfree(pfile); -+ -+err_put_mod: -+ module_put(THIS_MODULE); -+ return ret; -+} -+ -+static void -+panthor_postclose(struct drm_device *ddev, struct drm_file *file) -+{ -+ struct panthor_file *pfile = file->driver_priv; -+ -+ panthor_group_pool_destroy(pfile); -+ panthor_vm_pool_destroy(pfile); -+ -+ kfree(pfile); -+ module_put(THIS_MODULE); -+} -+ -+static const struct drm_ioctl_desc panthor_drm_driver_ioctls[] = { -+#define PANTHOR_IOCTL(n, func, flags) \ -+ DRM_IOCTL_DEF_DRV(PANTHOR_##n, panthor_ioctl_##func, flags) -+ -+ PANTHOR_IOCTL(DEV_QUERY, dev_query, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(VM_CREATE, vm_create, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(VM_DESTROY, vm_destroy, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(VM_BIND, vm_bind, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(VM_GET_STATE, vm_get_state, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(BO_CREATE, bo_create, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(BO_MMAP_OFFSET, bo_mmap_offset, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(GROUP_CREATE, group_create, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(GROUP_DESTROY, group_destroy, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(GROUP_GET_STATE, group_get_state, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(TILER_HEAP_CREATE, tiler_heap_create, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(TILER_HEAP_DESTROY, tiler_heap_destroy, DRM_RENDER_ALLOW), -+ PANTHOR_IOCTL(GROUP_SUBMIT, group_submit, DRM_RENDER_ALLOW), -+}; -+ -+static int panthor_mmap(struct file *filp, struct vm_area_struct *vma) -+{ -+ struct drm_file *file = filp->private_data; -+ struct panthor_file *pfile = file->driver_priv; -+ struct panthor_device *ptdev = pfile->ptdev; -+ u64 offset = (u64)vma->vm_pgoff << PAGE_SHIFT; -+ int ret, cookie; -+ -+ if (!drm_dev_enter(file->minor->dev, &cookie)) -+ return -ENODEV; -+ -+ if (panthor_device_mmio_offset(offset) >= DRM_PANTHOR_USER_MMIO_OFFSET) -+ ret = panthor_device_mmap_io(ptdev, vma); -+ else -+ ret = drm_gem_mmap(filp, vma); -+ -+ drm_dev_exit(cookie); -+ return ret; -+} -+ -+static const struct file_operations panthor_drm_driver_fops = { -+ .open = drm_open, -+ .release = drm_release, -+ .unlocked_ioctl = drm_ioctl, -+ .compat_ioctl = drm_compat_ioctl, -+ .poll = drm_poll, -+ .read = drm_read, -+ .llseek = noop_llseek, -+ .mmap = panthor_mmap, -+}; -+ -+#ifdef CONFIG_DEBUG_FS -+static void panthor_debugfs_init(struct drm_minor *minor) -+{ -+ panthor_mmu_debugfs_init(minor); -+} -+#endif -+ -+/* -+ * PanCSF driver version: -+ * - 1.0 - initial interface -+ */ -+static const struct drm_driver panthor_drm_driver = { -+ .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | -+ DRIVER_SYNCOBJ_TIMELINE | DRIVER_GEM_GPUVA, -+ .open = panthor_open, -+ .postclose = panthor_postclose, -+ .ioctls = panthor_drm_driver_ioctls, -+ .num_ioctls = ARRAY_SIZE(panthor_drm_driver_ioctls), -+ .fops = &panthor_drm_driver_fops, -+ .name = "panthor", -+ .desc = "Panthor DRM driver", -+ .date = "20230801", -+ .major = 1, -+ .minor = 0, -+ -+ .gem_create_object = panthor_gem_create_object, -+ .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, -+#ifdef CONFIG_DEBUG_FS -+ .debugfs_init = panthor_debugfs_init, -+#endif -+}; -+ -+static int panthor_probe(struct platform_device *pdev) -+{ -+ struct panthor_device *ptdev; -+ -+ ptdev = devm_drm_dev_alloc(&pdev->dev, &panthor_drm_driver, -+ struct panthor_device, base); -+ if (!ptdev) -+ return -ENOMEM; -+ -+ platform_set_drvdata(pdev, ptdev); -+ -+ return panthor_device_init(ptdev); -+} -+ -+static void panthor_remove(struct platform_device *pdev) -+{ -+ struct panthor_device *ptdev = platform_get_drvdata(pdev); -+ -+ panthor_device_unplug(ptdev); -+} -+ -+static const struct of_device_id dt_match[] = { -+ { .compatible = "rockchip,rk3588-mali" }, -+ { .compatible = "arm,mali-valhall-csf" }, -+ {} -+}; -+MODULE_DEVICE_TABLE(of, dt_match); -+ -+static DEFINE_RUNTIME_DEV_PM_OPS(panthor_pm_ops, -+ panthor_device_suspend, -+ panthor_device_resume, -+ NULL); -+ -+static struct platform_driver panthor_driver = { -+ .probe = panthor_probe, -+ .remove_new = panthor_remove, -+ .driver = { -+ .name = "panthor", -+ .pm = &panthor_pm_ops, -+ .of_match_table = dt_match, -+ }, -+}; -+ -+/* -+ * Workqueue used to cleanup stuff. -+ * -+ * We create a dedicated workqueue so we can drain on unplug and -+ * make sure all resources are freed before the module is unloaded. -+ */ -+struct workqueue_struct *panthor_cleanup_wq; -+ -+static int __init panthor_init(void) -+{ -+ int ret; -+ -+ ret = panthor_mmu_pt_cache_init(); -+ if (ret) -+ return ret; -+ -+ panthor_cleanup_wq = alloc_workqueue("panthor-cleanup", WQ_UNBOUND, 0); -+ if (!panthor_cleanup_wq) { -+ pr_err("panthor: Failed to allocate the workqueues"); -+ ret = -ENOMEM; -+ goto err_mmu_pt_cache_fini; -+ } -+ -+ ret = platform_driver_register(&panthor_driver); -+ if (ret) -+ goto err_destroy_cleanup_wq; -+ -+ return 0; -+ -+err_destroy_cleanup_wq: -+ destroy_workqueue(panthor_cleanup_wq); -+ -+err_mmu_pt_cache_fini: -+ panthor_mmu_pt_cache_fini(); -+ return ret; -+} -+module_init(panthor_init); -+ -+static void __exit panthor_exit(void) -+{ -+ platform_driver_unregister(&panthor_driver); -+ destroy_workqueue(panthor_cleanup_wq); -+ panthor_mmu_pt_cache_fini(); -+} -+module_exit(panthor_exit); -+ -+MODULE_AUTHOR("Panthor Project Developers"); -+MODULE_DESCRIPTION("Panthor DRM Driver"); -+MODULE_LICENSE("Dual MIT/GPL"); --- -2.42.0 - - -From 44b0849ed37131c13a8185fbf652ebbf558eee68 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:43 +0100 -Subject: [PATCH 62/81] drm/panthor: Allow driver compilation - -Now that all blocks are available, we can add/update Kconfig/Makefile -files to allow compilation. - -v4: -- Add Steve's R-b - -v3: -- Add a dep on DRM_GPUVM -- Fix dependencies in Kconfig -- Expand help text to (hopefully) describe which GPUs are to be - supported by this driver and which are for panfrost. - -Co-developed-by: Steven Price -Signed-off-by: Steven Price -Signed-off-by: Boris Brezillon -Acked-by: Steven Price # MIT+GPL2 relicensing,Arm -Acked-by: Grant Likely # MIT+GPL2 relicensing,Linaro -Acked-by: Boris Brezillon # MIT+GPL2 relicensing,Collabora -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-13-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - drivers/gpu/drm/Kconfig | 2 ++ - drivers/gpu/drm/Makefile | 1 + - drivers/gpu/drm/panthor/Kconfig | 23 +++++++++++++++++++++++ - drivers/gpu/drm/panthor/Makefile | 15 +++++++++++++++ - 4 files changed, 41 insertions(+) - create mode 100644 drivers/gpu/drm/panthor/Kconfig - create mode 100644 drivers/gpu/drm/panthor/Makefile - -diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig -index 2520db0b776e..f4ab4b9b9d7f 100644 ---- a/drivers/gpu/drm/Kconfig -+++ b/drivers/gpu/drm/Kconfig -@@ -383,6 +383,8 @@ source "drivers/gpu/drm/lima/Kconfig" - - source "drivers/gpu/drm/panfrost/Kconfig" - -+source "drivers/gpu/drm/panthor/Kconfig" -+ - source "drivers/gpu/drm/aspeed/Kconfig" - - source "drivers/gpu/drm/mcde/Kconfig" -diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile -index 104b42df2e95..6eb2b553a163 100644 ---- a/drivers/gpu/drm/Makefile -+++ b/drivers/gpu/drm/Makefile -@@ -179,6 +179,7 @@ obj-$(CONFIG_DRM_XEN) += xen/ - obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/ - obj-$(CONFIG_DRM_LIMA) += lima/ - obj-$(CONFIG_DRM_PANFROST) += panfrost/ -+obj-$(CONFIG_DRM_PANTHOR) += panthor/ - obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ - obj-$(CONFIG_DRM_MCDE) += mcde/ - obj-$(CONFIG_DRM_TIDSS) += tidss/ -diff --git a/drivers/gpu/drm/panthor/Kconfig b/drivers/gpu/drm/panthor/Kconfig -new file mode 100644 -index 000000000000..159845e51116 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/Kconfig -@@ -0,0 +1,23 @@ -+# SPDX-License-Identifier: GPL-2.0 or MIT -+ -+config DRM_PANTHOR -+ tristate "Panthor (DRM support for ARM Mali CSF-based GPUs)" -+ depends on DRM -+ depends on ARM || ARM64 || COMPILE_TEST -+ depends on !GENERIC_ATOMIC64 # for IOMMU_IO_PGTABLE_LPAE -+ depends on MMU -+ select DRM_GPUVM -+ select DRM_EXEC -+ select DRM_SCHED -+ select IOMMU_SUPPORT -+ select IOMMU_IO_PGTABLE_LPAE -+ select DRM_GEM_SHMEM_HELPER -+ select PM_DEVFREQ -+ select DEVFREQ_GOV_SIMPLE_ONDEMAND -+ help -+ DRM driver for ARM Mali CSF-based GPUs. -+ -+ This driver is for Mali (or Immortalis) Valhall Gxxx GPUs. -+ -+ Note that the Mali-G68 and Mali-G78, while Valhall architecture, will -+ be supported with the panfrost driver as they are not CSF GPUs. -diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile -new file mode 100644 -index 000000000000..64193a484879 ---- /dev/null -+++ b/drivers/gpu/drm/panthor/Makefile -@@ -0,0 +1,15 @@ -+# SPDX-License-Identifier: GPL-2.0 or MIT -+ -+panthor-y := \ -+ panthor_devfreq.o \ -+ panthor_device.o \ -+ panthor_drv.o \ -+ panthor_gem.o \ -+ panthor_gpu.o \ -+ panthor_heap.o \ -+ panthor_heap.o \ -+ panthor_fw.o \ -+ panthor_mmu.o \ -+ panthor_sched.o -+ -+obj-$(CONFIG_DRM_PANTHOR) += panthor.o --- -2.42.0 - - -From e9aa047b2f352493966f663488eeb995af58908f Mon Sep 17 00:00:00 2001 -From: Liviu Dudau -Date: Mon, 22 Jan 2024 17:30:44 +0100 -Subject: [PATCH 63/81] dt-bindings: gpu: mali-valhall-csf: Add support for Arm - Mali CSF GPUs - -Arm has introduced a new v10 GPU architecture that replaces the Job Manager -interface with a new Command Stream Frontend. It adds firmware driven -command stream queues that can be used by kernel and user space to submit -jobs to the GPU. - -Add the initial schema for the device tree that is based on support for -RK3588 SoC. The minimum number of clocks is one for the IP, but on Rockchip -platforms they will tend to expose the semi-independent clocks for better -power management. - -v4: -- Fix formatting issue - -v3: -- Cleanup commit message to remove redundant text -- Added opp-table property and re-ordered entries -- Clarified power-domains and power-domain-names requirements for RK3588. -- Cleaned up example - -Note: power-domains and power-domain-names requirements for other platforms -are still work in progress, hence the bindings are left incomplete here. - -v2: -- New commit - -Signed-off-by: Liviu Dudau -Cc: Krzysztof Kozlowski -Cc: Rob Herring -Cc: Conor Dooley -Cc: -Signed-off-by: Boris Brezillon -Link: https://lore.kernel.org/r/20240122163047.1954733-14-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - .../bindings/gpu/arm,mali-valhall-csf.yaml | 147 ++++++++++++++++++ - 1 file changed, 147 insertions(+) - create mode 100644 Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml - -diff --git a/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml -new file mode 100644 -index 000000000000..be1f6bacc3f3 ---- /dev/null -+++ b/Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml -@@ -0,0 +1,147 @@ -+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause -+%YAML 1.2 -+--- -+$id: http://devicetree.org/schemas/gpu/arm,mali-valhall-csf.yaml# -+$schema: http://devicetree.org/meta-schemas/core.yaml# -+ -+title: ARM Mali Valhall GPU -+ -+maintainers: -+ - Liviu Dudau -+ - Boris Brezillon -+ -+properties: -+ $nodename: -+ pattern: '^gpu@[a-f0-9]+$' -+ -+ compatible: -+ oneOf: -+ - items: -+ - enum: -+ - rockchip,rk3588-mali -+ - const: arm,mali-valhall-csf # Mali Valhall GPU model/revision is fully discoverable -+ -+ reg: -+ maxItems: 1 -+ -+ interrupts: -+ items: -+ - description: Job interrupt -+ - description: MMU interrupt -+ - description: GPU interrupt -+ -+ interrupt-names: -+ items: -+ - const: job -+ - const: mmu -+ - const: gpu -+ -+ clocks: -+ minItems: 1 -+ maxItems: 3 -+ -+ clock-names: -+ minItems: 1 -+ items: -+ - const: core -+ - const: coregroup -+ - const: stacks -+ -+ mali-supply: true -+ -+ operating-points-v2: true -+ opp-table: -+ type: object -+ -+ power-domains: -+ minItems: 1 -+ maxItems: 5 -+ -+ power-domain-names: -+ minItems: 1 -+ maxItems: 5 -+ -+ sram-supply: true -+ -+ "#cooling-cells": -+ const: 2 -+ -+ dynamic-power-coefficient: -+ $ref: /schemas/types.yaml#/definitions/uint32 -+ description: -+ A u32 value that represents the running time dynamic -+ power coefficient in units of uW/MHz/V^2. The -+ coefficient can either be calculated from power -+ measurements or derived by analysis. -+ -+ The dynamic power consumption of the GPU is -+ proportional to the square of the Voltage (V) and -+ the clock frequency (f). The coefficient is used to -+ calculate the dynamic power as below - -+ -+ Pdyn = dynamic-power-coefficient * V^2 * f -+ -+ where voltage is in V, frequency is in MHz. -+ -+ dma-coherent: true -+ -+required: -+ - compatible -+ - reg -+ - interrupts -+ - interrupt-names -+ - clocks -+ - mali-supply -+ -+additionalProperties: false -+ -+allOf: -+ - if: -+ properties: -+ compatible: -+ contains: -+ const: rockchip,rk3588-mali -+ then: -+ properties: -+ clocks: -+ minItems: 3 -+ power-domains: -+ maxItems: 1 -+ power-domain-names: false -+ -+examples: -+ - | -+ #include -+ #include -+ #include -+ #include -+ -+ gpu: gpu@fb000000 { -+ compatible = "rockchip,rk3588-mali", "arm,mali-valhall-csf"; -+ reg = <0xfb000000 0x200000>; -+ interrupts = , -+ , -+ ; -+ interrupt-names = "job", "mmu", "gpu"; -+ clock-names = "core", "coregroup", "stacks"; -+ clocks = <&cru CLK_GPU>, <&cru CLK_GPU_COREGROUP>, -+ <&cru CLK_GPU_STACKS>; -+ power-domains = <&power RK3588_PD_GPU>; -+ operating-points-v2 = <&gpu_opp_table>; -+ mali-supply = <&vdd_gpu_s0>; -+ sram-supply = <&vdd_gpu_mem_s0>; -+ }; -+ -+ gpu_opp_table: opp-table { -+ compatible = "operating-points-v2"; -+ opp-300000000 { -+ opp-hz = /bits/ 64 <300000000>; -+ opp-microvolt = <675000 675000 850000>; -+ }; -+ opp-400000000 { -+ opp-hz = /bits/ 64 <400000000>; -+ opp-microvolt = <675000 675000 850000>; -+ }; -+ }; -+ -+... --- -2.42.0 - - -From a27ed1f881983819161de9190a02bdfe22dd1833 Mon Sep 17 00:00:00 2001 -From: Boris Brezillon -Date: Mon, 22 Jan 2024 17:30:45 +0100 -Subject: [PATCH 64/81] drm/panthor: Add an entry to MAINTAINERS - -Add an entry for the Panthor driver to the MAINTAINERS file. - -v4: -- Add Steve's R-b - -v3: -- Add bindings document as an 'F:' line. -- Add Steven and Liviu as co-maintainers. - -Signed-off-by: Boris Brezillon -Reviewed-by: Steven Price -Link: https://lore.kernel.org/r/20240122163047.1954733-15-boris.brezillon@collabora.com -Signed-off-by: Sebastian Reichel ---- - MAINTAINERS | 11 +++++++++++ - 1 file changed, 11 insertions(+) - -diff --git a/MAINTAINERS b/MAINTAINERS -index 8d1052fa6a69..81bda84e2b7c 100644 ---- a/MAINTAINERS -+++ b/MAINTAINERS -@@ -1668,6 +1668,17 @@ F: Documentation/gpu/panfrost.rst - F: drivers/gpu/drm/panfrost/ - F: include/uapi/drm/panfrost_drm.h - -+ARM MALI PANTHOR DRM DRIVER -+M: Boris Brezillon -+M: Steven Price -+M: Liviu Dudau -+L: dri-devel@lists.freedesktop.org -+S: Supported -+T: git git://anongit.freedesktop.org/drm/drm-misc -+F: Documentation/devicetree/bindings/gpu/arm,mali-valhall-csf.yaml -+F: drivers/gpu/drm/panthor/ -+F: include/uapi/drm/panthor_drm.h -+ - ARM MALI-DP DRM DRIVER - M: Liviu Dudau - S: Supported --- -2.42.0 - - -From 1a9416cf366528cbe6c02960d472a3de33424905 Mon Sep 17 00:00:00 2001 +From 824dcd9d4a60a9f688b68c38cc8c7a3cdad8518f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Thu, 25 Jan 2024 14:46:53 +0100 -Subject: [PATCH 65/81] arm64: defconfig: support Mali CSF-based GPUs +Subject: [PATCH 62/71] arm64: defconfig: support Mali CSF-based GPUs Enable support for Mali CSF-based GPUs, which is found on recent ARM SoCs, such as Rockchip or Mediatek. @@ -30861,10 +29855,10 @@ index ab24a68ebada..16ab18539eac 100644 2.42.0 -From e565de0048e35a7900a50eca540b6bd60a020355 Mon Sep 17 00:00:00 2001 +From 4ada2382b0aa09c0c01846b98629a5889025cdf0 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 28 Jun 2023 11:15:16 +0200 -Subject: [PATCH 66/81] soc/rockchip: Add a regulator coupler for the Mali GPU +Subject: [PATCH 63/71] soc/rockchip: Add a regulator coupler for the Mali GPU on rk3588 G610 Mali normally takes 2 regulators, but the devfreq implementation @@ -30872,6 +29866,7 @@ can only deal with one. Let's add a regulator coupler as done for mtk8183. Signed-off-by: Boris Brezillon +[do s/Mediatek/Rockchip and rename mrc to rrc] Signed-off-by: Sebastian Reichel --- drivers/soc/rockchip/Kconfig | 5 + @@ -30881,7 +29876,7 @@ Signed-off-by: Sebastian Reichel create mode 100644 drivers/soc/rockchip/rockchip-regulator-coupler.c diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig -index 785f60c6f3ad..20153b813c03 100644 +index 785f60c6f3ad..d9a692985de7 100644 --- a/drivers/soc/rockchip/Kconfig +++ b/drivers/soc/rockchip/Kconfig @@ -22,6 +22,11 @@ config ROCKCHIP_IODOMAIN @@ -30889,7 +29884,7 @@ index 785f60c6f3ad..20153b813c03 100644 voltage supplied by the regulators. +config ROCKCHIP_REGULATOR_COUPLER -+ bool "MediaTek SoC Regulator Coupler" if COMPILE_TEST ++ bool "Rockchip SoC Regulator Coupler" if COMPILE_TEST + default ARCH_ROCKCHIP + depends on REGULATOR + @@ -30908,7 +29903,7 @@ index 23d414433c8c..ef7c1e03d7d0 100644 obj-$(CONFIG_ROCKCHIP_DTPM) += dtpm.o diff --git a/drivers/soc/rockchip/rockchip-regulator-coupler.c b/drivers/soc/rockchip/rockchip-regulator-coupler.c new file mode 100644 -index 000000000000..18e84733a0df +index 000000000000..a285595e926d --- /dev/null +++ b/drivers/soc/rockchip/rockchip-regulator-coupler.c @@ -0,0 +1,158 @@ @@ -30948,10 +29943,10 @@ index 000000000000..18e84733a0df + struct regulator_dev *rdev, + suspend_state_t state) +{ -+ struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); ++ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); + int max_spread = rdev->constraints->max_spread[0]; -+ int vsram_min_uV = mrc->vsram_rdev->constraints->min_uV; -+ int vsram_max_uV = mrc->vsram_rdev->constraints->max_uV; ++ int vsram_min_uV = rrc->vsram_rdev->constraints->min_uV; ++ int vsram_max_uV = rrc->vsram_rdev->constraints->max_uV; + int vsram_target_min_uV, vsram_target_max_uV; + int min_uV = 0; + int max_uV = INT_MAX; @@ -30966,7 +29961,7 @@ index 000000000000..18e84733a0df + * implies that the target device has yet to perform initialization + * and setting a voltage at that time is harmless. + */ -+ if (rdev == mrc->vsram_rdev) { ++ if (rdev == rrc->vsram_rdev) { + if (rdev->use_count == 0) + return regulator_do_balance_voltage(rdev, state, true); + @@ -31001,9 +29996,9 @@ index 000000000000..18e84733a0df + + pr_debug("Setting voltage %d-%duV on %s (minuV %d)\n", + vsram_target_min_uV, vsram_target_max_uV, -+ rdev_get_name(mrc->vsram_rdev), min_uV); ++ rdev_get_name(rrc->vsram_rdev), min_uV); + -+ ret = regulator_set_voltage_rdev(mrc->vsram_rdev, vsram_target_min_uV, ++ ret = regulator_set_voltage_rdev(rrc->vsram_rdev, vsram_target_min_uV, + vsram_target_max_uV, state); + if (ret) + return ret; @@ -31015,7 +30010,7 @@ index 000000000000..18e84733a0df +static int rockchip_regulator_attach(struct regulator_coupler *coupler, + struct regulator_dev *rdev) +{ -+ struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); ++ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); + const char *rdev_name = rdev_get_name(rdev); + + /* @@ -31029,9 +30024,9 @@ index 000000000000..18e84733a0df + return 1; + + if (strstr(rdev_name, "gpu_mem")) { -+ if (mrc->vsram_rdev) ++ if (rrc->vsram_rdev) + return -EINVAL; -+ mrc->vsram_rdev = rdev; ++ rrc->vsram_rdev = rdev; + } else if (!strstr(rdev_name, "gpu")) { + return 1; + } @@ -31042,10 +30037,10 @@ index 000000000000..18e84733a0df +static int rockchip_regulator_detach(struct regulator_coupler *coupler, + struct regulator_dev *rdev) +{ -+ struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); ++ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); + -+ if (rdev == mrc->vsram_rdev) -+ mrc->vsram_rdev = NULL; ++ if (rdev == rrc->vsram_rdev) ++ rrc->vsram_rdev = NULL; + + return 0; +} @@ -31074,10 +30069,10 @@ index 000000000000..18e84733a0df 2.42.0 -From 83964313343e80f582bdd69f5e4fa13738620a9c Mon Sep 17 00:00:00 2001 +From 3467599a86c3a4ec64b2100fa3a889004b685b7f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 7 Aug 2023 17:30:58 +0200 -Subject: [PATCH 67/81] arm64: dts: rockchip: rk3588: Add GPU nodes +Subject: [PATCH 64/71] arm64: dts: rockchip: rk3588: Add GPU nodes Signed-off-by: Sebastian Reichel --- @@ -31085,10 +30080,10 @@ Signed-off-by: Sebastian Reichel 1 file changed, 119 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 110bf1cec8b6..507e5c6f8bd0 100644 +index bd3e2b03385c..1b281dc677a4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -962,6 +962,120 @@ usb_host2_xhci: usb@fcd00000 { +@@ -904,6 +904,120 @@ usb_host2_xhci: usb@fcd00000 { snps,dis-del-phy-power-chg-quirk; snps,dis-tx-ipgap-linecheck-quirk; snps,dis_rxdet_inp3_quirk; @@ -31209,7 +30204,7 @@ index 110bf1cec8b6..507e5c6f8bd0 100644 status = "disabled"; }; -@@ -3081,6 +3195,11 @@ gpio4: gpio@fec50000 { +@@ -3023,6 +3137,11 @@ gpio4: gpio@fec50000 { }; }; @@ -31225,10 +30220,10 @@ index 110bf1cec8b6..507e5c6f8bd0 100644 2.42.0 -From 6badfdb6f15cd51051e974697eab682a68ebd8aa Mon Sep 17 00:00:00 2001 +From 6a37c358e52ff17e0a42fee070f40e2f112e3904 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 8 Aug 2023 12:05:22 +0200 -Subject: [PATCH 68/81] arm64: dts: rockchip: rk3588-rock5b: Add GPU node +Subject: [PATCH 65/71] arm64: dts: rockchip: rk3588-rock5b: Add GPU node Signed-off-by: Boris Brezillon Signed-off-by: Sebastian Reichel @@ -31237,11 +30232,11 @@ Signed-off-by: Sebastian Reichel 1 file changed, 6 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index c1fce00c9c18..6b4a699dea3d 100644 +index d1e78da13709..d49ce332995f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -@@ -193,6 +193,11 @@ &cpu_l3 { - mem-supply = <&vdd_cpu_lit_mem_s0>; +@@ -185,6 +185,11 @@ &cpu_l3 { + cpu-supply = <&vdd_cpu_lit_s0>; }; +&gpu { @@ -31252,7 +30247,7 @@ index c1fce00c9c18..6b4a699dea3d 100644 &display_subsystem { clocks = <&hdptxphy_hdmi0>; clock-names = "hdmi0_phy_pll"; -@@ -562,6 +567,7 @@ rk806_dvs3_null: dvs3-null-pins { +@@ -554,6 +559,7 @@ rk806_dvs3_null: dvs3-null-pins { regulators { vdd_gpu_s0: vdd_gpu_mem_s0: dcdc-reg1 { @@ -31264,10 +30259,10 @@ index c1fce00c9c18..6b4a699dea3d 100644 2.42.0 -From 1000a66f4cd7a4863fde689c398d1c977793f1ef Mon Sep 17 00:00:00 2001 +From 0dec1b3e93c0b25e5ad61e48be35455fb5faca7e Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 7 Aug 2023 17:36:22 +0200 -Subject: [PATCH 69/81] arm64: dts: rockchip: rk3588-evb1: Enable GPU +Subject: [PATCH 66/71] arm64: dts: rockchip: rk3588-evb1: Enable GPU Signed-off-by: Boris Brezillon Signed-off-by: Sebastian Reichel @@ -31276,10 +30271,10 @@ Signed-off-by: Sebastian Reichel 1 file changed, 11 insertions(+) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 4fac5f52f99d..3b01cad72892 100644 +index d4be4d01874d..60dd26f32b84 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -364,6 +364,12 @@ &hdptxphy_hdmi0 { +@@ -356,6 +356,12 @@ &hdptxphy_hdmi0 { status = "okay"; }; @@ -31292,7 +30287,7 @@ index 4fac5f52f99d..3b01cad72892 100644 &i2c2 { status = "okay"; -@@ -669,12 +675,15 @@ rk806_dvs3_null: dvs3-null-pins { +@@ -661,12 +667,15 @@ rk806_dvs3_null: dvs3-null-pins { regulators { vdd_gpu_s0: dcdc-reg1 { @@ -31308,7 +30303,7 @@ index 4fac5f52f99d..3b01cad72892 100644 regulator-state-mem { regulator-off-in-suspend; }; -@@ -725,6 +734,8 @@ vdd_gpu_mem_s0: dcdc-reg5 { +@@ -717,6 +726,8 @@ vdd_gpu_mem_s0: dcdc-reg5 { regulator-ramp-delay = <12500>; regulator-enable-ramp-delay = <400>; regulator-name = "vdd_gpu_mem_s0"; @@ -31321,115 +30316,10 @@ index 4fac5f52f99d..3b01cad72892 100644 2.42.0 -From 3813c9acf0f54ca7308a4057e17f29ffe38a2a0c Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Tue, 30 Jan 2024 18:09:05 +0100 -Subject: [PATCH 70/81] soc/rockchip: remove remaining Mediatek traces - -Fix helper text in Kconfig and rename mrc (mediatek regulator coupler) -to rrc (rockchip regulator coupler). - -Signed-off-by: Sebastian Reichel ---- - drivers/soc/rockchip/Kconfig | 2 +- - .../soc/rockchip/rockchip-regulator-coupler.c | 24 +++++++++---------- - 2 files changed, 13 insertions(+), 13 deletions(-) - -diff --git a/drivers/soc/rockchip/Kconfig b/drivers/soc/rockchip/Kconfig -index 20153b813c03..d9a692985de7 100644 ---- a/drivers/soc/rockchip/Kconfig -+++ b/drivers/soc/rockchip/Kconfig -@@ -23,7 +23,7 @@ config ROCKCHIP_IODOMAIN - voltage supplied by the regulators. - - config ROCKCHIP_REGULATOR_COUPLER -- bool "MediaTek SoC Regulator Coupler" if COMPILE_TEST -+ bool "Rockchip SoC Regulator Coupler" if COMPILE_TEST - default ARCH_ROCKCHIP - depends on REGULATOR - -diff --git a/drivers/soc/rockchip/rockchip-regulator-coupler.c b/drivers/soc/rockchip/rockchip-regulator-coupler.c -index 18e84733a0df..a285595e926d 100644 ---- a/drivers/soc/rockchip/rockchip-regulator-coupler.c -+++ b/drivers/soc/rockchip/rockchip-regulator-coupler.c -@@ -34,10 +34,10 @@ static int rockchip_regulator_balance_voltage(struct regulator_coupler *coupler, - struct regulator_dev *rdev, - suspend_state_t state) - { -- struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); -+ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); - int max_spread = rdev->constraints->max_spread[0]; -- int vsram_min_uV = mrc->vsram_rdev->constraints->min_uV; -- int vsram_max_uV = mrc->vsram_rdev->constraints->max_uV; -+ int vsram_min_uV = rrc->vsram_rdev->constraints->min_uV; -+ int vsram_max_uV = rrc->vsram_rdev->constraints->max_uV; - int vsram_target_min_uV, vsram_target_max_uV; - int min_uV = 0; - int max_uV = INT_MAX; -@@ -52,7 +52,7 @@ static int rockchip_regulator_balance_voltage(struct regulator_coupler *coupler, - * implies that the target device has yet to perform initialization - * and setting a voltage at that time is harmless. - */ -- if (rdev == mrc->vsram_rdev) { -+ if (rdev == rrc->vsram_rdev) { - if (rdev->use_count == 0) - return regulator_do_balance_voltage(rdev, state, true); - -@@ -87,9 +87,9 @@ static int rockchip_regulator_balance_voltage(struct regulator_coupler *coupler, - - pr_debug("Setting voltage %d-%duV on %s (minuV %d)\n", - vsram_target_min_uV, vsram_target_max_uV, -- rdev_get_name(mrc->vsram_rdev), min_uV); -+ rdev_get_name(rrc->vsram_rdev), min_uV); - -- ret = regulator_set_voltage_rdev(mrc->vsram_rdev, vsram_target_min_uV, -+ ret = regulator_set_voltage_rdev(rrc->vsram_rdev, vsram_target_min_uV, - vsram_target_max_uV, state); - if (ret) - return ret; -@@ -101,7 +101,7 @@ static int rockchip_regulator_balance_voltage(struct regulator_coupler *coupler, - static int rockchip_regulator_attach(struct regulator_coupler *coupler, - struct regulator_dev *rdev) - { -- struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); -+ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); - const char *rdev_name = rdev_get_name(rdev); - - /* -@@ -115,9 +115,9 @@ static int rockchip_regulator_attach(struct regulator_coupler *coupler, - return 1; - - if (strstr(rdev_name, "gpu_mem")) { -- if (mrc->vsram_rdev) -+ if (rrc->vsram_rdev) - return -EINVAL; -- mrc->vsram_rdev = rdev; -+ rrc->vsram_rdev = rdev; - } else if (!strstr(rdev_name, "gpu")) { - return 1; - } -@@ -128,10 +128,10 @@ static int rockchip_regulator_attach(struct regulator_coupler *coupler, - static int rockchip_regulator_detach(struct regulator_coupler *coupler, - struct regulator_dev *rdev) - { -- struct rockchip_regulator_coupler *mrc = to_rockchip_coupler(coupler); -+ struct rockchip_regulator_coupler *rrc = to_rockchip_coupler(coupler); - -- if (rdev == mrc->vsram_rdev) -- mrc->vsram_rdev = NULL; -+ if (rdev == rrc->vsram_rdev) -+ rrc->vsram_rdev = NULL; - - return 0; - } --- -2.42.0 - - -From d80858ed3fd9b4b68b14008a1883930afd9219db Mon Sep 17 00:00:00 2001 +From bfa04cad3ea931be0a306b343f9bbd989a370f61 Mon Sep 17 00:00:00 2001 From: "Carsten Haitzler (Rasterman)" Date: Tue, 6 Feb 2024 10:12:54 +0000 -Subject: [PATCH 71/81] arm64: dts: rockchip: Slow down EMMC a bit to keep IO +Subject: [PATCH 67/71] arm64: dts: rockchip: Slow down EMMC a bit to keep IO stable This drops to hs200 mode and 150Mhz as this is actually stable across @@ -31444,10 +30334,10 @@ Signed-off-by: Sebastian Reichel 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index 6b4a699dea3d..a41473075bef 100644 +index d49ce332995f..7031360187a4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -@@ -462,8 +462,8 @@ &sdhci { +@@ -454,8 +454,8 @@ &sdhci { no-sdio; no-sd; non-removable; @@ -31462,353 +30352,10 @@ index 6b4a699dea3d..a41473075bef 100644 2.42.0 -From 4b58f4f8c7f67fdc90087be78471bf257d524c8d Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 31 Jan 2024 18:15:50 +0100 -Subject: [PATCH 72/81] arm64: dts: rockchip: rk3588-evb1: Couple CPU - regulators - -The RK3588 CPUs have two supply inputs: one supply for the logic and one -for the memory interface. On many platforms both supplies are handled by -the same regulator. - -Boards, which have separate regulators for each supply need them coupled -together. This is necessary when cpufreq support is added to avoid crashes. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -index 3b01cad72892..5b4052d07268 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588-evb1-v10.dts -@@ -988,6 +988,8 @@ vdd_cpu_big1_s0: dcdc-reg1 { - regulator-max-microvolt = <1050000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_big1_s0"; -+ regulator-coupled-with = <&vdd_cpu_big1_mem_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; -@@ -1000,6 +1002,8 @@ vdd_cpu_big0_s0: dcdc-reg2 { - regulator-max-microvolt = <1050000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_big0_s0"; -+ regulator-coupled-with = <&vdd_cpu_big0_mem_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; -@@ -1012,6 +1016,8 @@ vdd_cpu_lit_s0: dcdc-reg3 { - regulator-max-microvolt = <950000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_lit_s0"; -+ regulator-coupled-with = <&vdd_cpu_lit_mem_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; -@@ -1036,6 +1042,8 @@ vdd_cpu_big1_mem_s0: dcdc-reg5 { - regulator-max-microvolt = <1050000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_big1_mem_s0"; -+ regulator-coupled-with = <&vdd_cpu_big1_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; -@@ -1049,6 +1057,8 @@ vdd_cpu_big0_mem_s0: dcdc-reg6 { - regulator-max-microvolt = <1050000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_big0_mem_s0"; -+ regulator-coupled-with = <&vdd_cpu_big0_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; -@@ -1073,6 +1083,8 @@ vdd_cpu_lit_mem_s0: dcdc-reg8 { - regulator-max-microvolt = <950000>; - regulator-ramp-delay = <12500>; - regulator-name = "vdd_cpu_lit_mem_s0"; -+ regulator-coupled-with = <&vdd_cpu_lit_s0>; -+ regulator-coupled-max-spread = <10000>; - regulator-state-mem { - regulator-off-in-suspend; - }; --- -2.42.0 - - -From 6b866890d3956ad4d2631329cdb0fb3413b75781 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 31 Jan 2024 19:26:08 +0100 -Subject: [PATCH 73/81] arm64: dts: rockchip: rk3588: simplify opp nodes for - the cpu - -With the coupler regulator setup, it is possible to use the -generic cpufreq driver. This requires dropping the extra -regulator information, which is now handled by the coupler. - -Signed-off-by: Sebastian Reichel ---- - arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 102 +++++----------------- - 1 file changed, 22 insertions(+), 80 deletions(-) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -index 507e5c6f8bd0..1b281dc677a4 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -+++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi -@@ -57,51 +57,43 @@ cluster0_opp_table: opp-table-cluster0 { - - opp-408000000 { - opp-hz = /bits/ 64 <408000000>; -- opp-microvolt = <750000 750000 950000>, -- <750000 750000 950000>; -+ opp-microvolt = <750000 750000 950000>; - clock-latency-ns = <40000>; - opp-suspend; - }; - opp-600000000 { - opp-hz = /bits/ 64 <600000000>; -- opp-microvolt = <750000 750000 950000>, -- <750000 750000 950000>; -+ opp-microvolt = <750000 750000 950000>; - clock-latency-ns = <40000>; - }; - opp-816000000 { - opp-hz = /bits/ 64 <816000000>; -- opp-microvolt = <750000 750000 950000>, -- <750000 750000 950000>; -+ opp-microvolt = <750000 750000 950000>; - clock-latency-ns = <40000>; - }; - opp-1008000000 { - opp-hz = /bits/ 64 <1008000000>; -- opp-microvolt = <750000 750000 950000>, -- <750000 750000 950000>; -+ opp-microvolt = <750000 750000 950000>; - clock-latency-ns = <40000>; - }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; -- opp-microvolt = <775000 775000 950000>, -- <775000 775000 950000>; -+ opp-microvolt = <775000 775000 950000>; - clock-latency-ns = <40000>; - }; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; -- opp-microvolt = <825000 825000 950000>, -- <825000 825000 950000>; -+ opp-microvolt = <825000 825000 950000>; - clock-latency-ns = <40000>; - }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; -- opp-microvolt = <875000 875000 950000>, -- <875000 875000 950000>; -+ opp-microvolt = <875000 875000 950000>; - clock-latency-ns = <40000>; - }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; -- opp-microvolt = <950000 950000 950000>, -- <950000 950000 950000>; -+ opp-microvolt = <950000 950000 950000>; - clock-latency-ns = <40000>; - }; - }; -@@ -122,63 +114,38 @@ cluster1_opp_table: opp-table-cluster1 { - - opp-408000000 { - opp-hz = /bits/ 64 <408000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - opp-suspend; - }; -- opp-600000000 { -- opp-hz = /bits/ 64 <600000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; -- opp-816000000 { -- opp-hz = /bits/ 64 <816000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; -- opp-1008000000 { -- opp-hz = /bits/ 64 <1008000000>; -- opp-microvolt = <625000 625000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; -- opp-microvolt = <650000 650000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; -- opp-microvolt = <675000 675000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; -- opp-microvolt = <700000 700000 1000000>, -- <700000 700000 1000000>; -+ opp-microvolt = <700000 700000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; -- opp-microvolt = <775000 775000 1000000>, -- <775000 775000 1000000>; -+ opp-microvolt = <775000 775000 1000000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; -- opp-microvolt = <850000 850000 1000000>, -- <850000 850000 1000000>; -+ opp-microvolt = <850000 850000 1000000>; - clock-latency-ns = <40000>; - }; - opp-2208000000 { - opp-hz = /bits/ 64 <2208000000>; -- opp-microvolt = <925000 925000 1000000>, -- <925000 925000 1000000>; -+ opp-microvolt = <925000 925000 1000000>; - clock-latency-ns = <40000>; - }; - }; -@@ -199,63 +166,38 @@ cluster2_opp_table: opp-table-cluster2 { - - opp-408000000 { - opp-hz = /bits/ 64 <408000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - opp-suspend; - }; -- opp-600000000 { -- opp-hz = /bits/ 64 <600000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; -- opp-816000000 { -- opp-hz = /bits/ 64 <816000000>; -- opp-microvolt = <600000 600000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; -- opp-1008000000 { -- opp-hz = /bits/ 64 <1008000000>; -- opp-microvolt = <625000 625000 1000000>, -- <675000 675000 1000000>; -- clock-latency-ns = <40000>; -- }; - opp-1200000000 { - opp-hz = /bits/ 64 <1200000000>; -- opp-microvolt = <650000 650000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1416000000 { - opp-hz = /bits/ 64 <1416000000>; -- opp-microvolt = <675000 675000 1000000>, -- <675000 675000 1000000>; -+ opp-microvolt = <675000 675000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1608000000 { - opp-hz = /bits/ 64 <1608000000>; -- opp-microvolt = <700000 700000 1000000>, -- <700000 700000 1000000>; -+ opp-microvolt = <700000 700000 1000000>; - clock-latency-ns = <40000>; - }; - opp-1800000000 { - opp-hz = /bits/ 64 <1800000000>; -- opp-microvolt = <775000 775000 1000000>, -- <775000 775000 1000000>; -+ opp-microvolt = <775000 775000 1000000>; - clock-latency-ns = <40000>; - }; - opp-2016000000 { - opp-hz = /bits/ 64 <2016000000>; -- opp-microvolt = <850000 850000 1000000>, -- <850000 850000 1000000>; -+ opp-microvolt = <850000 850000 1000000>; - clock-latency-ns = <40000>; - }; - opp-2208000000 { - opp-hz = /bits/ 64 <2208000000>; -- opp-microvolt = <925000 925000 1000000>, -- <925000 925000 1000000>; -+ opp-microvolt = <925000 925000 1000000>; - clock-latency-ns = <40000>; - }; - }; --- -2.42.0 - - -From 0b7abd3089fb12b9bfdba7c08ca839e2586bef50 Mon Sep 17 00:00:00 2001 -From: Sebastian Reichel -Date: Wed, 7 Feb 2024 19:01:03 +0100 -Subject: [PATCH 74/81] cpufreq: rockchip: Update for regulator coupler usage - -The mem supply will be handled via regulator coupler support. - -Signed-off-by: Sebastian Reichel ---- - drivers/cpufreq/rockchip-cpufreq.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/drivers/cpufreq/rockchip-cpufreq.c b/drivers/cpufreq/rockchip-cpufreq.c -index 0bf57ac85e60..9aaca8f3e782 100644 ---- a/drivers/cpufreq/rockchip-cpufreq.c -+++ b/drivers/cpufreq/rockchip-cpufreq.c -@@ -10,7 +10,7 @@ - * * handling of read margin registers - * - * Copyright (C) 2017 Fuzhou Rockchip Electronics Co., Ltd -- * Copyright (C) 2023 Collabora Ltd. -+ * Copyright (C) 2023-2024 Collabora Ltd. - */ - - #include -@@ -313,7 +313,7 @@ static int rockchip_cpufreq_cluster_init(int cpu, struct cluster_info *cluster) - int opp_table_token = -EINVAL; - struct device_node *np; - struct device *dev; -- const char * const reg_names[] = { "cpu", "mem", NULL }; -+ const char * const reg_names[] = { "cpu", NULL }; - int ret = 0; - - dev = get_cpu_device(cpu); --- -2.42.0 - - -From f0be8778fd12daebc2c63ad86e3e5ef5d9dbaa12 Mon Sep 17 00:00:00 2001 +From 8e449805b62eede77ca36179cac2ac815cae97e2 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Wed, 14 Jun 2023 15:06:37 +0530 -Subject: [PATCH 75/81] clk: rockchip: rst-rk3588: Add BIU reset +Subject: [PATCH 68/71] clk: rockchip: rst-rk3588: Add BIU reset Export hdmirx_biu soft reset id which is required by the hdmirx controller. @@ -31845,10 +30392,10 @@ index d4264db2a07f..e2fe4bd5f7f0 100644 2.42.0 -From 52ec3628d90bbd9e966b8778a108c891c78794c8 Mon Sep 17 00:00:00 2001 +From cfdba15bb795498794c833030a51612e4c7b82e0 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Wed, 20 Dec 2023 18:30:13 +0530 -Subject: [PATCH 76/81] dt-bindings: media: Document bindings for HDMI RX +Subject: [PATCH 69/71] dt-bindings: media: Document bindings for HDMI RX Controller Document bindings for the Synopsys DesignWare HDMI RX Controller. @@ -31998,10 +30545,10 @@ index 000000000000..a70d96b548ee 2.42.0 -From 84608d5dc9360aa5da869465936bf542f4032431 Mon Sep 17 00:00:00 2001 +From 7438a3eb9ceb1c24d82c08c9aab83249e66e79ac Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Wed, 20 Dec 2023 16:50:14 +0530 -Subject: [PATCH 77/81] arm64: dts: rockchip: Add device tree support for HDMI +Subject: [PATCH 70/71] arm64: dts: rockchip: Add device tree support for HDMI RX Controller Add device tree support for Synopsys DesignWare HDMI RX @@ -32071,10 +30618,10 @@ index 244c66faa161..e5f3d0acbd55 100644 i2c0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -index a41473075bef..73f987f44a43 100644 +index 7031360187a4..60459d8f656e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts -@@ -217,6 +217,18 @@ &hdptxphy_hdmi0 { +@@ -209,6 +209,18 @@ &hdptxphy_hdmi0 { status = "okay"; }; @@ -32093,7 +30640,7 @@ index a41473075bef..73f987f44a43 100644 &i2c0 { pinctrl-names = "default"; pinctrl-0 = <&i2c0m2_xfer>; -@@ -395,6 +407,12 @@ &pcie3x4 { +@@ -387,6 +399,12 @@ &pcie3x4 { }; &pinctrl { @@ -32178,10 +30725,10 @@ index 5984016b5f96..534c42262c73 100644 2.42.0 -From 3e8f408e023c2813973d387b4fa9da9ff5f9736d Mon Sep 17 00:00:00 2001 +From f66797f47c2edae99153674f332b2eb2749038c6 Mon Sep 17 00:00:00 2001 From: Shreeya Patel Date: Wed, 20 Dec 2023 16:52:01 +0530 -Subject: [PATCH 78/81] media: platform: synopsys: Add support for hdmi input +Subject: [PATCH 71/71] media: platform: synopsys: Add support for hdmi input driver Add initial support for the Synopsys DesignWare HDMI RX @@ -35908,166 +34455,3 @@ index 000000000000..ae43f74d471d -- 2.42.0 - -From 9571a251161f8742002b0a277a2fc5a6bfc97ca0 Mon Sep 17 00:00:00 2001 -From: K900 -Date: Wed, 3 Jan 2024 11:08:12 +0300 -Subject: [PATCH 79/81] arm64: dts: rockchip: enable cpufreq on - rk3588s-orangepi-5 - ---- - arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -index 25de4362af38..33bf7812e158 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -@@ -102,34 +102,42 @@ &combphy2_psu { - - &cpu_b0 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b1 { - cpu-supply = <&vdd_cpu_big0_s0>; -+ mem-supply = <&vdd_cpu_big0_s0>; - }; - - &cpu_b2 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_b3 { - cpu-supply = <&vdd_cpu_big1_s0>; -+ mem-supply = <&vdd_cpu_big1_s0>; - }; - - &cpu_l0 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_s0>; - }; - - &cpu_l1 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_s0>; - }; - - &cpu_l2 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_s0>; - }; - - &cpu_l3 { - cpu-supply = <&vdd_cpu_lit_s0>; -+ mem-supply = <&vdd_cpu_lit_s0>; - }; - - &gmac1 { --- -2.42.0 - - -From aac2a590cd797e6d95e4e515aa7faa02a7229d76 Mon Sep 17 00:00:00 2001 -From: K900 -Date: Fri, 12 Jan 2024 19:12:46 +0300 -Subject: [PATCH 80/81] arm64: dts: rockchip: enable HDMI out on - rk3588s-orangepi-5 - ---- - .../boot/dts/rockchip/rk3588s-orangepi-5.dts | 35 +++++++++++++++++++ - 1 file changed, 35 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -index 33bf7812e158..1e68315d1d04 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -@@ -4,6 +4,7 @@ - - #include - #include -+#include - #include - #include - #include "rk3588s.dtsi" -@@ -673,3 +674,37 @@ &usb_host1_ohci { - &usb_host2_xhci { - status = "okay"; - }; -+ -+&display_subsystem { -+ clocks = <&hdptxphy_hdmi0>; -+ clock-names = "hdmi0_phy_pll"; -+}; -+ -+&hdmi0 { -+ status = "okay"; -+}; -+ -+&hdmi0_in { -+ hdmi0_in_vp0: endpoint { -+ remote-endpoint = <&vp0_out_hdmi0>; -+ }; -+}; -+ -+&hdptxphy_hdmi0 { -+ status = "okay"; -+}; -+ -+&vop_mmu { -+ status = "okay"; -+}; -+ -+&vop { -+ status = "okay"; -+}; -+ -+&vp0 { -+ vp0_out_hdmi0: endpoint@ROCKCHIP_VOP2_EP_HDMI0 { -+ reg = ; -+ remote-endpoint = <&hdmi0_in_vp0>; -+ }; -+}; --- -2.42.0 - - -From eadcef24731e0f1ddb86dc7c9c859387b5b029a2 Mon Sep 17 00:00:00 2001 -From: K900 -Date: Thu, 25 Jan 2024 21:44:34 +0300 -Subject: [PATCH 81/81] arm64: dts: rockchip: enable the GPU on - rk3588s-orangepi-5 - ---- - arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts | 6 ++++++ - 1 file changed, 6 insertions(+) - -diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -index 1e68315d1d04..803b8bcee556 100644 ---- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -+++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts -@@ -362,6 +362,7 @@ rk806_dvs3_null: dvs3-null-pins { - regulators { - vdd_gpu_s0: dcdc-reg1 { - regulator-name = "vdd_gpu_s0"; -+ regulator-always-on; - regulator-boot-on; - regulator-min-microvolt = <550000>; - regulator-max-microvolt = <950000>; -@@ -675,6 +676,11 @@ &usb_host2_xhci { - status = "okay"; - }; - -+&gpu { -+ mali-supply = <&vdd_gpu_s0>; -+ status = "okay"; -+}; -+ - &display_subsystem { - clocks = <&hdptxphy_hdmi0>; - clock-names = "hdmi0_phy_pll"; --- -2.42.0 -