linux/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.h
Sunil Khatri 65b5c326ce drm/amdgpu/userq: refcount userqueues to avoid any race conditions
To avoid race condition and avoid UAF cases, implement kref
based queues and protect the below operations using xa lock
a. Getting a queue from xarray
b. Increment/Decrement it's refcount

Every time some one want to access a queue, always get via
amdgpu_userq_get to make sure we have locks in place and get
the object if active.

A userqueue is destroyed on the last refcount is dropped which
typically would be via IOCTL or during fini.

v2: Add the missing drop in one the condition in the signal ioclt [Alex]

v3: remove the queue from the xarray first in the free queue ioctl path
    [Christian]

- Pass queue to the amdgpu_userq_put directly.
- make amdgpu_userq_put xa_lock free since we are doing put for each get
  only and final put is done via destroy and we remove the queue from xa
  with lock.
- use userq_put in fini too so cleanup is done fully.

v4: Use xa_erase directly rather than doing load and erase in free
    ioctl. Also remove some of the error logs which could be exploited
    by the user to flood the logs [Christian]

Signed-off-by: Sunil Khatri <sunil.khatri@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 4952189b284d4d847f92636bb42dd747747129c0)
Cc: <stable@vger.kernel.org> # 048c1c4e51: drm/amdgpu/userq: Consolidate wait ioctl exit path
Cc: <stable@vger.kernel.org>
2026-03-04 13:15:00 -05:00

164 lines
5.5 KiB
C

/* SPDX-License-Identifier: MIT */
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef AMDGPU_USERQ_H_
#define AMDGPU_USERQ_H_
#include "amdgpu_eviction_fence.h"
#define AMDGPU_MAX_USERQ_COUNT 512
#define to_ev_fence(f) container_of(f, struct amdgpu_eviction_fence, base)
#define uq_mgr_to_fpriv(u) container_of(u, struct amdgpu_fpriv, userq_mgr)
#define work_to_uq_mgr(w, name) container_of(w, struct amdgpu_userq_mgr, name)
enum amdgpu_userq_state {
AMDGPU_USERQ_STATE_UNMAPPED = 0,
AMDGPU_USERQ_STATE_MAPPED,
AMDGPU_USERQ_STATE_PREEMPTED,
AMDGPU_USERQ_STATE_HUNG,
AMDGPU_USERQ_STATE_INVALID_VA,
};
struct amdgpu_mqd_prop;
struct amdgpu_userq_obj {
void *cpu_ptr;
uint64_t gpu_addr;
struct amdgpu_bo *obj;
};
struct amdgpu_userq_va_cursor {
u64 gpu_addr;
struct list_head list;
};
struct amdgpu_usermode_queue {
int queue_type;
enum amdgpu_userq_state state;
uint64_t doorbell_handle;
uint64_t doorbell_index;
uint64_t flags;
struct amdgpu_mqd_prop *userq_prop;
struct amdgpu_userq_mgr *userq_mgr;
struct amdgpu_vm *vm;
struct amdgpu_userq_obj mqd;
struct amdgpu_userq_obj db_obj;
struct amdgpu_userq_obj fw_obj;
struct amdgpu_userq_obj wptr_obj;
struct xarray fence_drv_xa;
struct amdgpu_userq_fence_driver *fence_drv;
struct dma_fence *last_fence;
u32 xcp_id;
int priority;
struct dentry *debugfs_queue;
struct delayed_work hang_detect_work;
struct dma_fence *hang_detect_fence;
struct kref refcount;
struct list_head userq_va_list;
};
struct amdgpu_userq_funcs {
int (*mqd_create)(struct amdgpu_usermode_queue *queue,
struct drm_amdgpu_userq_in *args);
void (*mqd_destroy)(struct amdgpu_usermode_queue *uq);
int (*unmap)(struct amdgpu_usermode_queue *queue);
int (*map)(struct amdgpu_usermode_queue *queue);
int (*preempt)(struct amdgpu_usermode_queue *queue);
int (*restore)(struct amdgpu_usermode_queue *queue);
int (*detect_and_reset)(struct amdgpu_device *adev,
int queue_type);
};
/* Usermode queues for gfx */
struct amdgpu_userq_mgr {
/**
* @userq_xa: Per-process user queue map (queue ID → queue)
* Key: queue_id (unique ID within the process's userq manager)
* Value: struct amdgpu_usermode_queue
*/
struct xarray userq_xa;
struct mutex userq_mutex;
struct amdgpu_device *adev;
struct delayed_work resume_work;
struct drm_file *file;
atomic_t userq_count[AMDGPU_RING_TYPE_MAX];
};
struct amdgpu_db_info {
uint64_t doorbell_handle;
uint32_t queue_type;
uint32_t doorbell_offset;
struct amdgpu_userq_obj *db_obj;
};
struct amdgpu_usermode_queue *amdgpu_userq_get(struct amdgpu_userq_mgr *uq_mgr, u32 qid);
void amdgpu_userq_put(struct amdgpu_usermode_queue *queue);
int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, struct drm_file *file_priv,
struct amdgpu_device *adev);
void amdgpu_userq_mgr_fini(struct amdgpu_userq_mgr *userq_mgr);
int amdgpu_userq_create_object(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_userq_obj *userq_obj,
int size);
void amdgpu_userq_destroy_object(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_userq_obj *userq_obj);
void amdgpu_userq_evict(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_eviction_fence *ev_fence);
void amdgpu_userq_ensure_ev_fence(struct amdgpu_userq_mgr *userq_mgr,
struct amdgpu_eviction_fence_mgr *evf_mgr);
uint64_t amdgpu_userq_get_doorbell_index(struct amdgpu_userq_mgr *uq_mgr,
struct amdgpu_db_info *db_info,
struct drm_file *filp);
u32 amdgpu_userq_get_supported_ip_mask(struct amdgpu_device *adev);
bool amdgpu_userq_enabled(struct drm_device *dev);
int amdgpu_userq_suspend(struct amdgpu_device *adev);
int amdgpu_userq_resume(struct amdgpu_device *adev);
int amdgpu_userq_stop_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
int amdgpu_userq_start_sched_for_enforce_isolation(struct amdgpu_device *adev,
u32 idx);
void amdgpu_userq_reset_work(struct work_struct *work);
void amdgpu_userq_pre_reset(struct amdgpu_device *adev);
int amdgpu_userq_post_reset(struct amdgpu_device *adev, bool vram_lost);
void amdgpu_userq_start_hang_detect_work(struct amdgpu_usermode_queue *queue);
int amdgpu_userq_input_va_validate(struct amdgpu_device *adev,
struct amdgpu_usermode_queue *queue,
u64 addr, u64 expected_size);
int amdgpu_userq_gem_va_unmap_validate(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
uint64_t saddr);
#endif