drm/amdgpu: introduce amdgpu_ttm_buffer_entity

No functional change for now, but this struct will have more
fields added in the next commit.

This change would introduce synchronisation issue, because
dependencies between successive jobs are not taken care of
properly. For instance, amdgpu_ttm_clear_buffer uses
amdgpu_ttm_map_buffer then amdgpu_ttm_fill_mem which should
use different entities (default_entity then move/clear entity).
To prevent failures for this commit, we limit ourselves to
2 entities: default_entity (which replaces high_pr usages) and
clear_entity (which replaces low_pr usages).

The next commits will deal with these dependencies correctly,
and then we'll be able to use move_entity.

---
v2: renamed amdgpu_ttm_buffer_entity
v4: don't use move_entity in ttm yet
---

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com> (v3)
Acked-by: Felix Kuehling <felix.kuehling@amd.com> (v3)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Pierre-Eric Pelloux-Prayer 2025-09-19 09:35:03 +02:00 committed by Alex Deucher
parent a0f8297068
commit edf47fb3d1
4 changed files with 39 additions and 18 deletions

View file

@ -738,7 +738,7 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* itself at least for GART.
*/
mutex_lock(&adev->mman.gtt_window_lock);
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job, AMDGPU_KERNEL_JOB_ID_FLUSH_GPU_TLB);

View file

@ -223,7 +223,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED, &job,
@ -1558,7 +1558,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
memcpy(adev->mman.sdma_access_ptr, buf, len);
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.default_entity.base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, AMDGPU_IB_POOL_DELAYED,
&job,
@ -2277,7 +2277,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
ring = adev->mman.buffer_funcs_ring;
sched = &ring->sched;
r = drm_sched_entity_init(&adev->mman.high_pr,
r = drm_sched_entity_init(&adev->mman.default_entity.base,
DRM_SCHED_PRIORITY_KERNEL, &sched,
1, NULL);
if (r) {
@ -2287,18 +2287,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
return;
}
r = drm_sched_entity_init(&adev->mman.low_pr,
r = drm_sched_entity_init(&adev->mman.clear_entity.base,
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
dev_err(adev->dev,
"Failed setting up TTM BO clear entity (%d)\n",
r);
goto error_free_entity;
}
r = drm_sched_entity_init(&adev->mman.move_entity.base,
DRM_SCHED_PRIORITY_NORMAL, &sched,
1, NULL);
if (r) {
dev_err(adev->dev,
"Failed setting up TTM BO move entity (%d)\n",
r);
drm_sched_entity_destroy(&adev->mman.clear_entity.base);
goto error_free_entity;
}
} else {
drm_sched_entity_destroy(&adev->mman.high_pr);
drm_sched_entity_destroy(&adev->mman.low_pr);
drm_sched_entity_destroy(&adev->mman.default_entity.base);
drm_sched_entity_destroy(&adev->mman.clear_entity.base);
drm_sched_entity_destroy(&adev->mman.move_entity.base);
/* Drop all the old fences since re-creating the scheduler entities
* will allocate new contexts.
*/
@ -2316,7 +2328,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
return;
error_free_entity:
drm_sched_entity_destroy(&adev->mman.high_pr);
drm_sched_entity_destroy(&adev->mman.default_entity.base);
}
static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
@ -2328,8 +2340,8 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
{
enum amdgpu_ib_pool_type pool = AMDGPU_IB_POOL_DELAYED;
int r;
struct drm_sched_entity *entity = delayed ? &adev->mman.low_pr :
&adev->mman.high_pr;
struct drm_sched_entity *entity = delayed ? &adev->mman.clear_entity.base :
&adev->mman.default_entity.base;
r = amdgpu_job_alloc_with_ib(adev, entity,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4, pool, job, k_job_id);

View file

@ -52,6 +52,10 @@ struct amdgpu_gtt_mgr {
spinlock_t lock;
};
struct amdgpu_ttm_buffer_entity {
struct drm_sched_entity base;
};
struct amdgpu_mman {
struct ttm_device bdev;
struct ttm_pool *ttm_pools;
@ -64,10 +68,10 @@ struct amdgpu_mman {
bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
/* High priority scheduler entity for buffer moves */
struct drm_sched_entity high_pr;
/* Low priority scheduler entity for VRAM clearing */
struct drm_sched_entity low_pr;
struct amdgpu_ttm_buffer_entity default_entity;
struct amdgpu_ttm_buffer_entity clear_entity;
struct amdgpu_ttm_buffer_entity move_entity;
struct amdgpu_vram_mgr vram_mgr;
struct amdgpu_gtt_mgr gtt_mgr;

View file

@ -45,7 +45,9 @@ svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, u64 addr)
}
static int
svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
svm_migrate_gart_map(struct amdgpu_ring *ring,
struct amdgpu_ttm_buffer_entity *entity,
u64 npages,
dma_addr_t *addr, u64 *gart_addr, u64 flags)
{
struct amdgpu_device *adev = ring->adev;
@ -63,7 +65,7 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, u64 npages,
num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
num_bytes = npages * 8;
r = amdgpu_job_alloc_with_ib(adev, &adev->mman.high_pr,
r = amdgpu_job_alloc_with_ib(adev, &entity->base,
AMDGPU_FENCE_OWNER_UNDEFINED,
num_dw * 4 + num_bytes,
AMDGPU_IB_POOL_DELAYED,
@ -128,11 +130,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
{
const u64 GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_ttm_buffer_entity *entity;
u64 gart_s, gart_d;
struct dma_fence *next;
u64 size;
int r;
entity = &adev->mman.default_entity;
mutex_lock(&adev->mman.gtt_window_lock);
while (npages) {
@ -140,10 +145,10 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
if (direction == FROM_VRAM_TO_RAM) {
gart_s = svm_migrate_direct_mapping_addr(adev, *vram);
r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0);
r = svm_migrate_gart_map(ring, entity, size, sys, &gart_d, 0);
} else if (direction == FROM_RAM_TO_VRAM) {
r = svm_migrate_gart_map(ring, size, sys, &gart_s,
r = svm_migrate_gart_map(ring, entity, size, sys, &gart_s,
KFD_IOCTL_SVM_FLAG_GPU_RO);
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}