mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:04:41 +01:00
drm/panthor: Add flag to map GEM object Write-Back Cacheable
Will be used by the UMD to optimize CPU accesses to buffers that are frequently read by the CPU, or on which the access pattern makes non-cacheable mappings inefficient. Mapping buffers CPU-cached implies taking care of the CPU cache maintenance in the UMD, unless the GPU is IO coherent. v2: - Add more to the commit message - Tweak the doc - Make sure we sync the section of the BO pointing to the CS syncobj before we read its seqno v3: - Fix formatting/spelling issues v4: - Add Steve's R-b v5: - Drop Steve's R-b (changes in the ioctl semantics requiring new review) v6: - Fix the uAPI doc - Fix inverted logic in some comment v7: - No changes v8: - Collect R-b Signed-off-by: Loïc Molinari <loic.molinari@collabora.com> Reviewed-by: Steven Price <steven.price@arm.com> Link: https://patch.msgid.link/20251208100841.730527-7-boris.brezillon@collabora.com Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
This commit is contained in:
parent
c146c82f86
commit
cd2c9c3015
4 changed files with 66 additions and 5 deletions
|
|
@ -902,7 +902,8 @@ static int panthor_ioctl_vm_destroy(struct drm_device *ddev, void *data,
|
|||
return panthor_vm_pool_destroy_vm(pfile->vms, args->id);
|
||||
}
|
||||
|
||||
#define PANTHOR_BO_FLAGS DRM_PANTHOR_BO_NO_MMAP
|
||||
#define PANTHOR_BO_FLAGS (DRM_PANTHOR_BO_NO_MMAP | \
|
||||
DRM_PANTHOR_BO_WB_MMAP)
|
||||
|
||||
static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
|
||||
struct drm_file *file)
|
||||
|
|
@ -921,6 +922,10 @@ static int panthor_ioctl_bo_create(struct drm_device *ddev, void *data,
|
|||
goto out_dev_exit;
|
||||
}
|
||||
|
||||
if ((args->flags & DRM_PANTHOR_BO_NO_MMAP) &&
|
||||
(args->flags & DRM_PANTHOR_BO_WB_MMAP))
|
||||
return -EINVAL;
|
||||
|
||||
if (args->exclusive_vm_id) {
|
||||
vm = panthor_vm_pool_get_vm(pfile->vms, args->exclusive_vm_id);
|
||||
if (!vm) {
|
||||
|
|
|
|||
|
|
@ -77,6 +77,39 @@ static void panthor_gem_debugfs_set_usage_flags(struct panthor_gem_object *bo, u
|
|||
static void panthor_gem_debugfs_bo_init(struct panthor_gem_object *bo) {}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
should_map_wc(struct panthor_gem_object *bo, struct panthor_vm *exclusive_vm)
|
||||
{
|
||||
struct panthor_device *ptdev = container_of(bo->base.base.dev, struct panthor_device, base);
|
||||
|
||||
/* We can't do uncached mappings if the device is coherent,
|
||||
* because the zeroing done by the shmem layer at page allocation
|
||||
* time happens on a cached mapping which isn't CPU-flushed (at least
|
||||
* not on Arm64 where the flush is deferred to PTE setup time, and
|
||||
* only done conditionally based on the mapping permissions). We can't
|
||||
* rely on dma_map_sgtable()/dma_sync_sgtable_for_xxx() either to flush
|
||||
* those, because they are NOPed if dma_dev_coherent() returns true.
|
||||
*
|
||||
* FIXME: Note that this problem is going to pop up again when we
|
||||
* decide to support mapping buffers with the NO_MMAP flag as
|
||||
* non-shareable (AKA buffers accessed only by the GPU), because we
|
||||
* need the same CPU flush to happen after page allocation, otherwise
|
||||
* there's a risk of data leak or late corruption caused by a dirty
|
||||
* cacheline being evicted. At this point we'll need a way to force
|
||||
* CPU cache maintenance regardless of whether the device is coherent
|
||||
* or not.
|
||||
*/
|
||||
if (ptdev->coherent)
|
||||
return false;
|
||||
|
||||
/* Cached mappings are explicitly requested, so no write-combine. */
|
||||
if (bo->flags & DRM_PANTHOR_BO_WB_MMAP)
|
||||
return false;
|
||||
|
||||
/* The default is write-combine. */
|
||||
return true;
|
||||
}
|
||||
|
||||
static void panthor_gem_free_object(struct drm_gem_object *obj)
|
||||
{
|
||||
struct panthor_gem_object *bo = to_panthor_bo(obj);
|
||||
|
|
@ -163,6 +196,7 @@ panthor_kernel_bo_create(struct panthor_device *ptdev, struct panthor_vm *vm,
|
|||
bo = to_panthor_bo(&obj->base);
|
||||
kbo->obj = &obj->base;
|
||||
bo->flags = bo_flags;
|
||||
bo->base.map_wc = should_map_wc(bo, vm);
|
||||
bo->exclusive_vm_root_gem = panthor_vm_root_gem(vm);
|
||||
drm_gem_object_get(bo->exclusive_vm_root_gem);
|
||||
bo->base.base.resv = bo->exclusive_vm_root_gem->resv;
|
||||
|
|
@ -363,7 +397,6 @@ static const struct drm_gem_object_funcs panthor_gem_funcs = {
|
|||
*/
|
||||
struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t size)
|
||||
{
|
||||
struct panthor_device *ptdev = container_of(ddev, struct panthor_device, base);
|
||||
struct panthor_gem_object *obj;
|
||||
|
||||
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
|
||||
|
|
@ -371,7 +404,6 @@ struct drm_gem_object *panthor_gem_create_object(struct drm_device *ddev, size_t
|
|||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
obj->base.base.funcs = &panthor_gem_funcs;
|
||||
obj->base.map_wc = !ptdev->coherent;
|
||||
mutex_init(&obj->label.lock);
|
||||
|
||||
panthor_gem_debugfs_bo_init(obj);
|
||||
|
|
@ -406,6 +438,7 @@ panthor_gem_create_with_handle(struct drm_file *file,
|
|||
|
||||
bo = to_panthor_bo(&shmem->base);
|
||||
bo->flags = flags;
|
||||
bo->base.map_wc = should_map_wc(bo, exclusive_vm);
|
||||
|
||||
if (exclusive_vm) {
|
||||
bo->exclusive_vm_root_gem = panthor_vm_root_gem(exclusive_vm);
|
||||
|
|
|
|||
|
|
@ -863,8 +863,11 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
|
|||
struct iosys_map map;
|
||||
int ret;
|
||||
|
||||
if (queue->syncwait.kmap)
|
||||
return queue->syncwait.kmap + queue->syncwait.offset;
|
||||
if (queue->syncwait.kmap) {
|
||||
bo = container_of(queue->syncwait.obj,
|
||||
struct panthor_gem_object, base.base);
|
||||
goto out_sync;
|
||||
}
|
||||
|
||||
bo = panthor_vm_get_bo_for_va(group->vm,
|
||||
queue->syncwait.gpu_va,
|
||||
|
|
@ -881,6 +884,17 @@ panthor_queue_get_syncwait_obj(struct panthor_group *group, struct panthor_queue
|
|||
if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
|
||||
goto err_put_syncwait_obj;
|
||||
|
||||
out_sync:
|
||||
/* Make sure the CPU caches are invalidated before the seqno is read.
|
||||
* drm_gem_shmem_sync() is a NOP if map_wc=true, so no need to check
|
||||
* it here.
|
||||
*/
|
||||
panthor_gem_sync(&bo->base.base, queue->syncwait.offset,
|
||||
queue->syncwait.sync64 ?
|
||||
sizeof(struct panthor_syncobj_64b) :
|
||||
sizeof(struct panthor_syncobj_32b),
|
||||
DRM_PANTHOR_BO_SYNC_CPU_CACHE_FLUSH_AND_INVALIDATE);
|
||||
|
||||
return queue->syncwait.kmap + queue->syncwait.offset;
|
||||
|
||||
err_put_syncwait_obj:
|
||||
|
|
|
|||
|
|
@ -681,6 +681,15 @@ struct drm_panthor_vm_get_state {
|
|||
enum drm_panthor_bo_flags {
|
||||
/** @DRM_PANTHOR_BO_NO_MMAP: The buffer object will never be CPU-mapped in userspace. */
|
||||
DRM_PANTHOR_BO_NO_MMAP = (1 << 0),
|
||||
|
||||
/**
|
||||
* @DRM_PANTHOR_BO_WB_MMAP: Force "Write-Back Cacheable" CPU mapping.
|
||||
*
|
||||
* CPU map the buffer object in userspace by forcing the "Write-Back
|
||||
* Cacheable" cacheability attribute. The mapping otherwise uses the
|
||||
* "Non-Cacheable" attribute if the GPU is not IO coherent.
|
||||
*/
|
||||
DRM_PANTHOR_BO_WB_MMAP = (1 << 1),
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue