Skip to content

Commit e4937f3

Browse files
2045geminigregkh
authored andcommitted
drm/amdgpu: use atomic functions with memory barriers for vm fault info
commit 6df8e84 upstream. The atomic variable vm_fault_info_updated is used to synchronize access to adev->gmc.vm_fault_info between the interrupt handler and get_vm_fault_info(). The default atomic functions like atomic_set() and atomic_read() do not provide memory barriers. This allows for CPU instruction reordering, meaning the memory accesses to vm_fault_info and the vm_fault_info_updated flag are not guaranteed to occur in the intended order. This creates a race condition that can lead to inconsistent or stale data being used. The previous implementation, which used an explicit mb(), was incomplete and inefficient. It failed to account for all potential CPU reorderings, such as the access of vm_fault_info being reordered before the atomic_read of the flag. This approach is also more verbose and less performant than using the proper atomic functions with acquire/release semantics. Fix this by switching to atomic_set_release() and atomic_read_acquire(). These functions provide the necessary acquire and release semantics, which act as memory barriers to ensure the correct order of operations. It is also more efficient and idiomatic than using explicit full memory barriers. Fixes: b97dfa2 ("drm/amdgpu: save vm fault information for amdkfd") Cc: [email protected] Signed-off-by: Gui-Dong Han <[email protected]> Signed-off-by: Felix Kuehling <[email protected]> Reviewed-by: Felix Kuehling <[email protected]> Signed-off-by: Alex Deucher <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent e5e3eb2 commit e4937f3

File tree

3 files changed

+8
-11
lines changed

3 files changed

+8
-11
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,10 +2353,9 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem)
23532353
int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev,
23542354
struct kfd_vm_fault_info *mem)
23552355
{
2356-
if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
2356+
if (atomic_read_acquire(&adev->gmc.vm_fault_info_updated) == 1) {
23572357
*mem = *adev->gmc.vm_fault_info;
2358-
mb(); /* make sure read happened */
2359-
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
2358+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
23602359
}
23612360
return 0;
23622361
}

drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,7 +1055,7 @@ static int gmc_v7_0_sw_init(void *handle)
10551055
GFP_KERNEL);
10561056
if (!adev->gmc.vm_fault_info)
10571057
return -ENOMEM;
1058-
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1058+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
10591059

10601060
return 0;
10611061
}
@@ -1287,7 +1287,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
12871287
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
12881288
VMID);
12891289
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
1290-
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
1290+
&& !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
12911291
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
12921292
u32 protections = REG_GET_FIELD(status,
12931293
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1303,8 +1303,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
13031303
info->prot_read = protections & 0x8 ? true : false;
13041304
info->prot_write = protections & 0x10 ? true : false;
13051305
info->prot_exec = protections & 0x20 ? true : false;
1306-
mb();
1307-
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
1306+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
13081307
}
13091308

13101309
return 0;

drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,7 +1168,7 @@ static int gmc_v8_0_sw_init(void *handle)
11681168
GFP_KERNEL);
11691169
if (!adev->gmc.vm_fault_info)
11701170
return -ENOMEM;
1171-
atomic_set(&adev->gmc.vm_fault_info_updated, 0);
1171+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 0);
11721172

11731173
return 0;
11741174
}
@@ -1468,7 +1468,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
14681468
vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
14691469
VMID);
14701470
if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
1471-
&& !atomic_read(&adev->gmc.vm_fault_info_updated)) {
1471+
&& !atomic_read_acquire(&adev->gmc.vm_fault_info_updated)) {
14721472
struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
14731473
u32 protections = REG_GET_FIELD(status,
14741474
VM_CONTEXT1_PROTECTION_FAULT_STATUS,
@@ -1484,8 +1484,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
14841484
info->prot_read = protections & 0x8 ? true : false;
14851485
info->prot_write = protections & 0x10 ? true : false;
14861486
info->prot_exec = protections & 0x20 ? true : false;
1487-
mb();
1488-
atomic_set(&adev->gmc.vm_fault_info_updated, 1);
1487+
atomic_set_release(&adev->gmc.vm_fault_info_updated, 1);
14891488
}
14901489

14911490
return 0;

0 commit comments

Comments
 (0)