mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:04:51 +01:00
drm/amdgpu: Reduce dequeue retry timeout for gfx9 family
Dequeue retry timeout controls the interval between checks for unmet conditions. On MI series, reduce this from 0x40 to 0x1 (~ 1 uS). The cost of additional bandwidth consumed by CP when polling memory shouldn't be substantial. Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Reviewed-by: Jonathan Kim <jonathan.kim@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
02fc2f3c46
commit
8a7820c072
10 changed files with 72 additions and 52 deletions
|
|
@ -189,7 +189,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
|
|||
.set_address_watch = kgd_gfx_aldebaran_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
|
||||
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
|
||||
.hqd_reset = kgd_gfx_v9_hqd_reset,
|
||||
|
|
|
|||
|
|
@ -415,7 +415,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
|
|||
.set_address_watch = kgd_gfx_v9_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
|
||||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
|
||||
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
|
||||
|
|
|
|||
|
|
@ -541,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
|
|||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings =
|
||||
kgd_gfx_v9_program_trap_handler_settings,
|
||||
.build_grace_period_packet_info =
|
||||
kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info =
|
||||
kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap,
|
||||
|
|
|
|||
|
|
@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
|
|||
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
|
||||
}
|
||||
|
||||
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t sch_wave,
|
||||
uint32_t que_sleep,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
/*
|
||||
* The CP cannont handle a 0 grace period input and will result in
|
||||
* an infinite grace period being set so set to 1 to prevent this.
|
||||
*/
|
||||
if (grace_period == 0)
|
||||
grace_period = 1;
|
||||
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
if (sch_wave)
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
sch_wave);
|
||||
if (que_sleep)
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
QUE_SLEEP,
|
||||
que_sleep);
|
||||
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
|
||||
}
|
||||
|
|
@ -1115,7 +1115,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
|
|||
.set_address_watch = kgd_gfx_v10_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
|
||||
.program_trap_handler_settings = program_trap_handler_settings,
|
||||
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
|
||||
.hqd_reset = kgd_gfx_v10_hqd_reset,
|
||||
|
|
|
|||
|
|
@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
|
|||
void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times,
|
||||
uint32_t inst);
|
||||
void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t sch_wave,
|
||||
uint32_t que_sleep,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data);
|
||||
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
|
||||
|
|
|
|||
|
|
@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
|
|||
.set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3,
|
||||
.program_trap_handler_settings = program_trap_handler_settings_v10_3,
|
||||
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info,
|
||||
.enable_debug_trap = kgd_gfx_v10_enable_debug_trap,
|
||||
.disable_debug_trap = kgd_gfx_v10_disable_debug_trap,
|
||||
.validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request,
|
||||
|
|
|
|||
|
|
@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev,
|
|||
adev->gfx.cu_info.max_waves_per_simd;
|
||||
}
|
||||
|
||||
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t sch_wave,
|
||||
uint32_t que_sleep,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
*reg_data = wait_times;
|
||||
|
||||
/*
|
||||
* The CP cannot handle a 0 grace period input and will result in
|
||||
* an infinite grace period being set so set to 1 to prevent this.
|
||||
*/
|
||||
if (grace_period == 0)
|
||||
grace_period = 1;
|
||||
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
grace_period);
|
||||
if (sch_wave)
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
SCH_WAVE,
|
||||
sch_wave);
|
||||
if (que_sleep)
|
||||
*reg_data = REG_SET_FIELD(*reg_data,
|
||||
CP_IQ_WAIT_TIME2,
|
||||
QUE_SLEEP,
|
||||
que_sleep);
|
||||
|
||||
*reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2);
|
||||
}
|
||||
|
|
@ -1255,7 +1255,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = {
|
|||
.set_address_watch = kgd_gfx_v9_set_address_watch,
|
||||
.clear_address_watch = kgd_gfx_v9_clear_address_watch,
|
||||
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
|
||||
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
|
||||
.build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info,
|
||||
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
|
||||
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
|
||||
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
|
||||
|
|
|
|||
|
|
@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev,
|
|||
void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times,
|
||||
uint32_t inst);
|
||||
void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev,
|
||||
void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t sch_wave,
|
||||
uint32_t que_sleep,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data);
|
||||
uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev,
|
||||
|
|
|
|||
|
|
@ -298,13 +298,14 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
|
|||
}
|
||||
|
||||
static inline void pm_build_dequeue_wait_counts_packet_info(struct packet_manager *pm,
|
||||
uint32_t sch_value, uint32_t *reg_offset,
|
||||
uint32_t sch_value, uint32_t que_sleep, uint32_t *reg_offset,
|
||||
uint32_t *reg_data)
|
||||
{
|
||||
pm->dqm->dev->kfd2kgd->build_grace_period_packet_info(
|
||||
pm->dqm->dev->kfd2kgd->build_dequeue_wait_counts_packet_info(
|
||||
pm->dqm->dev->adev,
|
||||
pm->dqm->wait_times,
|
||||
sch_value,
|
||||
que_sleep,
|
||||
reg_offset,
|
||||
reg_data);
|
||||
}
|
||||
|
|
@ -319,27 +320,43 @@ static int pm_config_dequeue_wait_counts_v9(struct packet_manager *pm,
|
|||
uint32_t reg_data = 0;
|
||||
|
||||
switch (cmd) {
|
||||
case KFD_DEQUEUE_WAIT_INIT:
|
||||
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
|
||||
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
|
||||
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, 1, ®_offset, ®_data);
|
||||
else
|
||||
case KFD_DEQUEUE_WAIT_INIT: {
|
||||
uint32_t sch_wave = 0, que_sleep = 0;
|
||||
/* Reduce CP_IQ_WAIT_TIME2.QUE_SLEEP to 0x1 from default 0x40.
|
||||
* On a 1GHz machine this is roughly 1 microsecond, which is
|
||||
* about how long it takes to load data out of memory during
|
||||
* queue connect
|
||||
* QUE_SLEEP: Wait Count for Dequeue Retry.
|
||||
*/
|
||||
if (KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(9, 4, 1) &&
|
||||
KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(10, 0, 0)) {
|
||||
que_sleep = 1;
|
||||
|
||||
/* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */
|
||||
if (amdgpu_emu_mode == 0 && pm->dqm->dev->adev->gmc.is_app_apu &&
|
||||
(KFD_GC_VERSION(pm->dqm->dev) == IP_VERSION(9, 4, 3)))
|
||||
sch_wave = 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, sch_wave, que_sleep,
|
||||
®_offset, ®_data);
|
||||
|
||||
break;
|
||||
}
|
||||
case KFD_DEQUEUE_WAIT_RESET:
|
||||
/* function called only to get reg_offset */
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, 0, ®_offset, ®_data);
|
||||
reg_data = pm->dqm->wait_times;
|
||||
/* reg_data would be set to dqm->wait_times */
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, 0, 0, ®_offset, ®_data);
|
||||
break;
|
||||
|
||||
case KFD_DEQUEUE_WAIT_SET_SCH_WAVE:
|
||||
/* The CP cannot handle value 0 and it will result in
|
||||
* an infinite grace period being set so set to 1 to prevent this.
|
||||
* an infinite grace period being set so set to 1 to prevent this. Also
|
||||
* avoid debugger API breakage as it sets 0 and expects a low value.
|
||||
*/
|
||||
if (!value)
|
||||
value = 1;
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, value, ®_offset, ®_data);
|
||||
pm_build_dequeue_wait_counts_packet_info(pm, value, 0, ®_offset, ®_data);
|
||||
break;
|
||||
default:
|
||||
pr_err("Invalid dequeue wait cmd\n");
|
||||
|
|
|
|||
|
|
@ -313,9 +313,10 @@ struct kfd2kgd_calls {
|
|||
void (*get_iq_wait_times)(struct amdgpu_device *adev,
|
||||
uint32_t *wait_times,
|
||||
uint32_t inst);
|
||||
void (*build_grace_period_packet_info)(struct amdgpu_device *adev,
|
||||
void (*build_dequeue_wait_counts_packet_info)(struct amdgpu_device *adev,
|
||||
uint32_t wait_times,
|
||||
uint32_t grace_period,
|
||||
uint32_t sch_wave,
|
||||
uint32_t que_sleep,
|
||||
uint32_t *reg_offset,
|
||||
uint32_t *reg_data);
|
||||
void (*get_cu_occupancy)(struct amdgpu_device *adev,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue