Skip to content

Commit b97dfa2

Browse files
shaoyunlogabbay
authored andcommitted
drm/amdgpu: save vm fault information for amdkfd
amdgpu save the vm fault related information for KFD usage and keep the copy until KFD read it. Signed-off-by: shaoyun liu <[email protected]> Signed-off-by: Felix Kuehling <[email protected]> Acked-by: Christian König <[email protected]> Signed-off-by: Oded Gabbay <[email protected]>
1 parent 101fee6 commit b97dfa2

File tree

8 files changed

+105
-2
lines changed

8 files changed

+105
-2
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
183183
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
184184
struct dma_fence **ef);
185185

186+
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
187+
struct kfd_vm_fault_info *info);
188+
186189
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
187190
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
188191

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
216216
.invalidate_tlbs = invalidate_tlbs,
217217
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
218218
.submit_ib = amdgpu_amdkfd_submit_ib,
219+
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
219220
};
220221

221222
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
176176
.invalidate_tlbs = invalidate_tlbs,
177177
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
178178
.submit_ib = amdgpu_amdkfd_submit_ib,
179+
.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
179180
};
180181

181182
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,6 +1621,20 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
16211621
return ret;
16221622
}
16231623

1624+
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
1625+
struct kfd_vm_fault_info *mem)
1626+
{
1627+
struct amdgpu_device *adev;
1628+
1629+
adev = (struct amdgpu_device *)kgd;
1630+
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
1631+
*mem = *adev->gmc.vm_fault_info;
1632+
mb();
1633+
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1634+
}
1635+
return 0;
1636+
}
1637+
16241638
/* Evict a userptr BO by stopping the queues if necessary
16251639
*
16261640
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it

drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ struct amdgpu_gmc {
105105
/* protects concurrent invalidation */
106106
spinlock_t invalidate_lock;
107107
bool translate_further;
108+
struct kfd_vm_fault_info *vm_fault_info;
109+
atomic_t vm_fault_info_updated;
108110

109111
const struct amdgpu_gmc_funcs *gmc_funcs;
110112
};

drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "cik.h"
2929
#include "gmc_v7_0.h"
3030
#include "amdgpu_ucode.h"
31+
#include "amdgpu_amdkfd.h"
3132

3233
#include "bif/bif_4_1_d.h"
3334
#include "bif/bif_4_1_sh_mask.h"
@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
10781079
adev->vm_manager.vram_base_offset = 0;
10791080
}
10801081

1082+
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
1083+
GFP_KERNEL);
1084+
if (!adev->gmc.vm_fault_info)
1085+
return -ENOMEM;
1086+
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1087+
10811088
return 0;
10821089
}
10831090

@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
10871094

10881095
amdgpu_gem_force_release(adev);
10891096
amdgpu_vm_manager_fini(adev);
1097+
kfree(adev->gmc.vm_fault_info);
10901098
gmc_v7_0_gart_fini(adev);
10911099
amdgpu_bo_fini(adev);
10921100
release_firmware(adev->gmc.fw);
@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
12761284
struct amdgpu_irq_src *source,
12771285
struct amdgpu_iv_entry *entry)
12781286
{
1279-
u32 addr, status, mc_client;
1287+
u32 addr, status, mc_client, vmid;
12801288

12811289
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
12821290
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
13011309
entry->pasid);
13021310
}
13031311

1312+
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1313+
VMID);
1314+
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
1315+
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
1316+
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
1317+
u32 protections = REG_GET_FIELD(status,
1318+
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1319+
PROTECTIONS);
1320+
1321+
info->vmid = vmid;
1322+
info->mc_id = REG_GET_FIELD(status,
1323+
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1324+
MEMORY_CLIENT_ID);
1325+
info->status = status;
1326+
info->page_addr = addr;
1327+
info->prot_valid = protections & 0x7 ? true : false;
1328+
info->prot_read = protections & 0x8 ? true : false;
1329+
info->prot_write = protections & 0x10 ? true : false;
1330+
info->prot_exec = protections & 0x20 ? true : false;
1331+
mb();
1332+
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
1333+
}
1334+
13041335
return 0;
13051336
}
13061337

drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "amdgpu.h"
2727
#include "gmc_v8_0.h"
2828
#include "amdgpu_ucode.h"
29+
#include "amdgpu_amdkfd.h"
2930

3031
#include "gmc/gmc_8_1_d.h"
3132
#include "gmc/gmc_8_1_sh_mask.h"
@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
11821183
adev->vm_manager.vram_base_offset = 0;
11831184
}
11841185

1186+
adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
1187+
GFP_KERNEL);
1188+
if (!adev->gmc.vm_fault_info)
1189+
return -ENOMEM;
1190+
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1191+
11851192
return 0;
11861193
}
11871194

@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
11911198

11921199
amdgpu_gem_force_release(adev);
11931200
amdgpu_vm_manager_fini(adev);
1201+
kfree(adev->gmc.vm_fault_info);
11941202
gmc_v8_0_gart_fini(adev);
11951203
amdgpu_bo_fini(adev);
11961204
release_firmware(adev->gmc.fw);
@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
14261434
struct amdgpu_irq_src *source,
14271435
struct amdgpu_iv_entry *entry)
14281436
{
1429-
u32 addr, status, mc_client;
1437+
u32 addr, status, mc_client, vmid;
14301438

14311439
if (amdgpu_sriov_vf(adev)) {
14321440
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
14631471
entry->pasid);
14641472
}
14651473

1474+
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1475+
VMID);
1476+
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
1477+
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
1478+
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
1479+
u32 protections = REG_GET_FIELD(status,
1480+
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1481+
PROTECTIONS);
1482+
1483+
info->vmid = vmid;
1484+
info->mc_id = REG_GET_FIELD(status,
1485+
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
1486+
MEMORY_CLIENT_ID);
1487+
info->status = status;
1488+
info->page_addr = addr;
1489+
info->prot_valid = protections & 0x7 ? true : false;
1490+
info->prot_read = protections & 0x8 ? true : false;
1491+
info->prot_write = protections & 0x10 ? true : false;
1492+
info->prot_exec = protections & 0x20 ? true : false;
1493+
mb();
1494+
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
1495+
}
1496+
14661497
return 0;
14671498
}
14681499

drivers/gpu/drm/amd/include/kgd_kfd_interface.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ enum kfd_preempt_type {
4747
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
4848
};
4949

50+
struct kfd_vm_fault_info {
51+
uint64_t page_addr;
52+
uint32_t vmid;
53+
uint32_t mc_id;
54+
uint32_t status;
55+
bool prot_valid;
56+
bool prot_read;
57+
bool prot_write;
58+
bool prot_exec;
59+
};
60+
5061
struct kfd_cu_info {
5162
uint32_t num_shader_engines;
5263
uint32_t num_shader_arrays_per_engine;
@@ -259,6 +270,12 @@ struct tile_config {
259270
* IB to the corresponding ring (ring type). The IB is executed with the
260271
* specified VMID in a user mode context.
261272
*
273+
* @get_vm_fault_info: Return information about a recent VM fault on
274+
* GFXv7 and v8. If multiple VM faults occurred since the last call of
275+
* this function, it will return information about the first of those
276+
* faults. On GFXv9 VM fault information is fully contained in the IH
277+
* packet and this function is not needed.
278+
*
262279
* This structure contains function pointers to services that the kgd driver
263280
* provides to amdkfd driver.
264281
*
@@ -374,6 +391,9 @@ struct kfd2kgd_calls {
374391
int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine,
375392
uint32_t vmid, uint64_t gpu_addr,
376393
uint32_t *ib_cmd, uint32_t ib_len);
394+
395+
int (*get_vm_fault_info)(struct kgd_dev *kgd,
396+
struct kfd_vm_fault_info *info);
377397
};
378398

379399
/**

0 commit comments

Comments
 (0)