Skip to content

Commit ec82b42

Browse files
hakzsamMarge Bot
authored andcommitted
radv: add a missing async compute workaround for Tonga/Iceland
After digging into PAL code again, I figured that Tonga/Iceland are both affected by a hw bug related to async compute dispatches. The solution is to change the "threadgroup" dimension mode to the "thread" dimension mode unconditionally. This should fix a bunch of issues related to RADV_DEBUG=nocompute on these GPUs. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7551 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6334 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/4679 Cc: mesa-stable Signed-off-by: Samuel Pitoiset <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26207>
1 parent f695a9f commit ec82b42

File tree

3 files changed

+24
-1
lines changed

3 files changed

+24
-1
lines changed

src/amd/common/ac_gpu_info.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,13 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
12451245
info->family == CHIP_BONAIRE ||
12461246
info->family == CHIP_KABINI;
12471247

1248+
/* HW bug workaround with async compute dispatches when threadgroup > 4096.
1249+
* The workaround is to change the "threadgroup" dimension mode to "thread"
1250+
* dimension mode.
1251+
*/
1252+
info->has_async_compute_threadgroup_bug = info->family == CHIP_ICELAND ||
1253+
info->family == CHIP_TONGA;
1254+
12481255
/* Support for GFX10.3 was added with F32_ME_FEATURE_VERSION_31 but the
12491256
* feature version wasn't bumped.
12501257
*/

src/amd/common/ac_gpu_info.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ struct radeon_info {
101101
bool has_two_planes_iterate256_bug;
102102
bool has_vgt_flush_ngg_legacy_bug;
103103
bool has_cs_regalloc_hang_bug;
104+
bool has_async_compute_threadgroup_bug;
104105
bool has_32bit_predication;
105106
bool has_3d_cube_border_color_mipmap;
106107
bool has_image_opcodes;

src/amd/vulkan/radv_cmd_buffer.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9550,11 +9550,11 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
95509550
radeon_emit(cs, dispatch_initiator);
95519551
}
95529552
} else {
9553+
const unsigned *cs_block_size = compute_shader->info.cs.block_size;
95539554
unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
95549555
unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
95559556

95569557
if (info->unaligned) {
9557-
const unsigned *cs_block_size = compute_shader->info.cs.block_size;
95589558
unsigned remainder[3];
95599559

95609560
/* If aligned, these should be an entire block size,
@@ -9619,6 +9619,21 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv
96199619
predicating = false;
96209620
}
96219621

9622+
if (cmd_buffer->device->physical_device->rad_info.has_async_compute_threadgroup_bug &&
9623+
cmd_buffer->qf == RADV_QUEUE_COMPUTE) {
9624+
for (unsigned i = 0; i < 3; i++) {
9625+
if (info->unaligned) {
9626+
/* info->blocks is already in thread dimensions for unaligned dispatches. */
9627+
blocks[i] = info->blocks[i];
9628+
} else {
9629+
/* Force the async compute dispatch to be in "thread" dim mode to workaround a hw bug. */
9630+
blocks[i] *= cs_block_size[i];
9631+
}
9632+
9633+
dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
9634+
}
9635+
}
9636+
96229637
radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
96239638
radeon_emit(cs, blocks[0]);
96249639
radeon_emit(cs, blocks[1]);

0 commit comments

Comments
 (0)