drm/amdgpu: Modify the count method of defer error

The number of newly added de counts and the number of
newly added error addresses remain consistent

Signed-off-by: Ce Sun <cesun102@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Ce Sun 2025-05-06 20:17:33 +08:00 committed by Alex Deucher
parent 085c997d44
commit 533aa8bdbe
3 changed files with 9 additions and 2 deletions

View file

@ -529,6 +529,7 @@ int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev,
pfns[i] = err_data.err_addr[i].retired_page;
}
ret = i;
adev->umc.err_addr_cnt = err_data.err_addr_cnt;
out:
kfree(err_data.err_addr);

View file

@ -145,6 +145,8 @@ struct amdgpu_umc {
unsigned long active_mask;
struct amdgpu_umc_flip_bits flip_bits;
unsigned long err_addr_cnt;
};
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);

View file

@ -482,8 +482,12 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank
bank->regs[ACA_REG_IDX_ADDR]);
ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
count = ext_error_code == 0 ?
ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
if (umc_v12_0_is_deferred_error(adev, status))
count = ext_error_code == 0 ?
adev->umc.err_addr_cnt / adev->umc.retire_unit : 1ULL;
else
count = ext_error_code == 0 ?
ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
return aca_error_cache_log_bank_error(handle, &info, err_type, count);
}