From 27454c1328095bba8db0afb6db0fcb0fc9c9b452 Mon Sep 17 00:00:00 2001 From: Kumar Date: Thu, 30 Oct 2025 10:38:55 +0530 Subject: [PATCH 01/13] Add gtests for compiler CI for faster testing --- test/ck_tile/CMakeLists.txt | 1 + test/ck_tile/compiler/CMakeLists.txt | 154 +++++++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 test/ck_tile/compiler/CMakeLists.txt diff --git a/test/ck_tile/CMakeLists.txt b/test/ck_tile/CMakeLists.txt index d58c80377a..b553b46977 100644 --- a/test/ck_tile/CMakeLists.txt +++ b/test/ck_tile/CMakeLists.txt @@ -33,3 +33,4 @@ add_subdirectory(atomic_add_op) add_subdirectory(fmha) add_subdirectory(gemm_tile_engine) add_subdirectory(pooling) +add_subdirectory(compiler) diff --git a/test/ck_tile/compiler/CMakeLists.txt b/test/ck_tile/compiler/CMakeLists.txt new file mode 100644 index 0000000000..55c5fe7443 --- /dev/null +++ b/test/ck_tile/compiler/CMakeLists.txt @@ -0,0 +1,154 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. + +# ================================================================================ +# CK Tile Compiler CI Test Suite +# ================================================================================= + +# This test suite contains a curated set of CK Tile tests specifically designed for compiler +# CI nodes. The goal is to provide comprehensive coverage while completing within 20-30 minutes + +set(COMPILER_TEST_COMPILE_OPTIONS) +if(CK_USE_OCP_FP8) + list(APPEND COMPILER_TEST_COMPILE_OPTIONS -DCK_USE_OCP_FP8) +endif() + +# ================================================================================ +# Basic GEMM Functionality Tests - Core functionality +# ================================================================================ +if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12") + add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp16_compiler ../gemm/test_gemm_pipeline_basic_fp16.cpp) + target_compile_options(test_ck_tile_gemm_pipeline_basic_fp16_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_gemm_pipeline_basic_fp16_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +# =============================================================================== + +if(GPU_TARGETS MATCHES "gfx94|gfx95|") + # Test basic tests for MI300 series + add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp8_compiler ../gemm/test_gemm_pipeline_basic_fp8.cpp) + target_compile_options(test_ck_tile_gemm_pipeline_basic_fp8_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_gemm_pipeline_basic_fp8_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +# ================================================================================ +# Memory Pipeline Tests - Single test for memory operations +# ================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_gemm_pipeline_mem_compiler ../gemm/test_gemm_pipeline_mem.cpp) + target_compile_options(test_ck_tile_gemm_pipeline_mem_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_gemm_pipeline_mem_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +if(GPU_TARGETS MATCHES "gfx11|gfx12") + # mem_wmma for Radeon GPU's + add_gtest_executable(test_ck_tile_gemm_pipeline_mem_wmma_compiler ../gemm/test_gemm_pipeline_mem_wmma.cpp) + target_compile_options(test_ck_tile_gemm_pipeline_mem_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_gemm_pipeline_mem_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Basic Batched GEMM Functionality tests +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_batched_gemm_compiler ../batched_gemm/test_batched_gemm.cpp) + target_compile_options(test_ck_tile_batched_gemm_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_batched_gemm_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Grouped GEMM - Multi stream execution +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_grouped_gemm_compiler ../grouped_gemm/test_grouped_gemm.cpp) + + set_tests_properties(test_ck_tile_grouped_gemm_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Reduce Operations - Important for Normalization +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_reduce2d_compiler ../reduce/test_reduce2d.cpp) + if(result EQUAL 0) + target_link_libraries(test_ck_tile_reduce2d_compiler PRIVATE utility) + endif() + set_tests_properties(test_ck_tile_reduce2d_compiler PROPERTIES LABELS "COMPILER_CI") +endif() + +#================================================================================ +# Data Type Tests - Quantization & Precision +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_pk_int4_compiler ../data_type/test_pk_int4.cpp) + set_tests_properties(test_ck_tile_pk_int4_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Epilogue Tests - Post Gemm operations +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_cshuffle_epilogue_compiler ../epilogue/test_cshuffle_epilogue.cpp) + set_tests_properties(test_ck_tile_cshuffle_epilogue_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# ElementWise Operations - Basic element operations +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_elementwise_1d_compiler ../elementwise/test_elementwise_1d.cpp) + set_tests_properties(test_ck_tile_elementwise_1d_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +# =============================================================================== +# Container Tests - Core Infrastructure +# =============================================================================== + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_tuple_apply_compiler ../container/test_tuple_apply.cpp) + if(result EQUAL 0) + target_link_libraries(test_ck_tile_tuple_apply_compiler PRIVATE utility) + endif() + set_tests_properties(test_ck_tile_tuple_apply_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Atomic Operations - Synchronization primitives +#================================================================================ + +add_gtest_executable(test_atomic_compiler ../atomic_add_op/test_atomic.cpp) +set_tests_properties(test_atomic_compiler PROPERTIES LABELS "COMPILE_CI") + +# ============================================================================== +# Batched Transpose - Memory Layout transformation +# ============================================================================== + +if(GPU_TARGETS MATCHES "gfx9") + add_gtest_executable(test_ck_tile_batched_transpose_compiler ../batched_transpose/test_batched_transpose.cpp) + set_tests_properties(test_ck_tile_batched_transpose_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Image to Column - Conv Preprocessing +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_tile_image_to_column_compiler ../image_to_column/test_tile_image_to_column.cpp) + set_tests_properties(test_tile_image_to_column_compiler PROPERTIES LABELS "COMPILE_CI") +endif() + +#================================================================================ +# Pooling - Important for CNN Operations +#================================================================================ + +if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") + add_gtest_executable(test_ck_tile_pooling_compiler ../pooling/test_pooling.cpp) + set_tests_properties(test_ck_tile_pooling_compiler PROPERTIES LABELS "COMPILE_CI") +endif() From eb8b0fcade1b55d6a46720476960a8d1b4431cf5 Mon Sep 17 00:00:00 2001 From: Kumar Date: Thu, 30 Oct 2025 12:12:32 +0530 Subject: [PATCH 02/13] Add changes to have a custom target --- test/ck_tile/compiler/CMakeLists.txt | 64 +++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 6 deletions(-) diff --git a/test/ck_tile/compiler/CMakeLists.txt b/test/ck_tile/compiler/CMakeLists.txt index 55c5fe7443..0d88e14bda 100644 --- a/test/ck_tile/compiler/CMakeLists.txt +++ b/test/ck_tile/compiler/CMakeLists.txt @@ -6,7 +6,7 @@ # ================================================================================= # This test suite contains a curated set of CK Tile tests specifically designed for compiler -# CI nodes. The goal is to provide comprehensive coverage while completing within 20-30 minutes +# CI nodes. The goal is to provide comprehensive coverage while completing within 20-30 minutes. set(COMPILER_TEST_COMPILE_OPTIONS) if(CK_USE_OCP_FP8) @@ -24,7 +24,7 @@ endif() # =============================================================================== -if(GPU_TARGETS MATCHES "gfx94|gfx95|") +if(GPU_TARGETS MATCHES "gfx94|gfx95") # Test basic tests for MI300 series add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp8_compiler ../gemm/test_gemm_pipeline_basic_fp8.cpp) target_compile_options(test_ck_tile_gemm_pipeline_basic_fp8_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) @@ -44,8 +44,8 @@ endif() if(GPU_TARGETS MATCHES "gfx11|gfx12") # mem_wmma for Radeon GPU's add_gtest_executable(test_ck_tile_gemm_pipeline_mem_wmma_compiler ../gemm/test_gemm_pipeline_mem_wmma.cpp) - target_compile_options(test_ck_tile_gemm_pipeline_mem_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_gemm_pipeline_mem_compiler PROPERTIES LABELS "COMPILE_CI") + target_compile_options(test_ck_tile_gemm_pipeline_mem_wmma_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) + set_tests_properties(test_ck_tile_gemm_pipeline_mem_wmma_compiler PROPERTIES LABELS "COMPILE_CI") endif() #================================================================================ @@ -64,7 +64,6 @@ endif() if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") add_gtest_executable(test_ck_tile_grouped_gemm_compiler ../grouped_gemm/test_grouped_gemm.cpp) - set_tests_properties(test_ck_tile_grouped_gemm_compiler PROPERTIES LABELS "COMPILE_CI") endif() @@ -77,7 +76,7 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") if(result EQUAL 0) target_link_libraries(test_ck_tile_reduce2d_compiler PRIVATE utility) endif() - set_tests_properties(test_ck_tile_reduce2d_compiler PROPERTIES LABELS "COMPILER_CI") + set_tests_properties(test_ck_tile_reduce2d_compiler PROPERTIES LABELS "COMPILE_CI") endif() #================================================================================ @@ -152,3 +151,56 @@ if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") add_gtest_executable(test_ck_tile_pooling_compiler ../pooling/test_pooling.cpp) set_tests_properties(test_ck_tile_pooling_compiler PROPERTIES LABELS "COMPILE_CI") endif() + +# ================================================================================ +# Custom Target: Build All Compiler Tests +# ================================================================================ + +add_custom_target(compiler_tests) + +# Add dependencies for all compiler test executables +if(TARGET test_ck_tile_gemm_pipeline_basic_fp16_compiler) + add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_basic_fp16_compiler) +endif() +if(TARGET test_ck_tile_gemm_pipeline_basic_fp8_compiler) + add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_basic_fp8_compiler) +endif() +if(TARGET test_ck_tile_gemm_pipeline_mem_compiler) + add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_mem_compiler) +endif() +if(TARGET test_ck_tile_gemm_pipeline_mem_wmma_compiler) + add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_mem_wmma_compiler) +endif() +if(TARGET test_ck_tile_batched_gemm_compiler) + add_dependencies(compiler_tests test_ck_tile_batched_gemm_compiler) +endif() +if(TARGET test_ck_tile_grouped_gemm_compiler) + add_dependencies(compiler_tests test_ck_tile_grouped_gemm_compiler) +endif() +if(TARGET test_ck_tile_reduce2d_compiler) + add_dependencies(compiler_tests test_ck_tile_reduce2d_compiler) +endif() +if(TARGET test_ck_tile_pk_int4_compiler) + add_dependencies(compiler_tests test_ck_tile_pk_int4_compiler) +endif() +if(TARGET test_ck_tile_cshuffle_epilogue_compiler) + add_dependencies(compiler_tests test_ck_tile_cshuffle_epilogue_compiler) +endif() +if(TARGET test_ck_tile_elementwise_1d_compiler) + add_dependencies(compiler_tests test_ck_tile_elementwise_1d_compiler) +endif() +if(TARGET test_ck_tile_tuple_apply_compiler) + add_dependencies(compiler_tests test_ck_tile_tuple_apply_compiler) +endif() +if(TARGET test_atomic_compiler) + add_dependencies(compiler_tests test_atomic_compiler) +endif() +if(TARGET test_ck_tile_batched_transpose_compiler) + add_dependencies(compiler_tests test_ck_tile_batched_transpose_compiler) +endif() +if(TARGET test_tile_image_to_column_compiler) + add_dependencies(compiler_tests test_tile_image_to_column_compiler) +endif() +if(TARGET test_ck_tile_pooling_compiler) + add_dependencies(compiler_tests test_ck_tile_pooling_compiler) +endif() From 4834ba90c720593ad25700886aac9ca20416699e Mon Sep 17 00:00:00 2001 From: Kumar Date: Sat, 1 Nov 2025 01:53:21 +0530 Subject: [PATCH 03/13] Add a gtest suite for gemm kernel for running CI tests with compiler mode --- test/ck_tile/compiler/CMakeLists.txt | 206 ------------------ test/ck_tile/gemm/CMakeLists.txt | 6 + .../gemm/test_gemm_pipeline_compiler.cpp | 130 +++++++++++ 3 files changed, 136 insertions(+), 206 deletions(-) delete mode 100644 test/ck_tile/compiler/CMakeLists.txt create mode 100644 test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp diff --git a/test/ck_tile/compiler/CMakeLists.txt b/test/ck_tile/compiler/CMakeLists.txt deleted file mode 100644 index 0d88e14bda..0000000000 --- a/test/ck_tile/compiler/CMakeLists.txt +++ /dev/null @@ -1,206 +0,0 @@ -# SPDX-License-Identifier: MIT -# Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. - -# ================================================================================ -# CK Tile Compiler CI Test Suite -# ================================================================================= - -# This test suite contains a curated set of CK Tile tests specifically designed for compiler -# CI nodes. The goal is to provide comprehensive coverage while completing within 20-30 minutes. - -set(COMPILER_TEST_COMPILE_OPTIONS) -if(CK_USE_OCP_FP8) - list(APPEND COMPILER_TEST_COMPILE_OPTIONS -DCK_USE_OCP_FP8) -endif() - -# ================================================================================ -# Basic GEMM Functionality Tests - Core functionality -# ================================================================================ -if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12") - add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp16_compiler ../gemm/test_gemm_pipeline_basic_fp16.cpp) - target_compile_options(test_ck_tile_gemm_pipeline_basic_fp16_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_gemm_pipeline_basic_fp16_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -# =============================================================================== - -if(GPU_TARGETS MATCHES "gfx94|gfx95") - # Test basic tests for MI300 series - add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp8_compiler ../gemm/test_gemm_pipeline_basic_fp8.cpp) - target_compile_options(test_ck_tile_gemm_pipeline_basic_fp8_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_gemm_pipeline_basic_fp8_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -# ================================================================================ -# Memory Pipeline Tests - Single test for memory operations -# ================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_gemm_pipeline_mem_compiler ../gemm/test_gemm_pipeline_mem.cpp) - target_compile_options(test_ck_tile_gemm_pipeline_mem_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_gemm_pipeline_mem_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -if(GPU_TARGETS MATCHES "gfx11|gfx12") - # mem_wmma for Radeon GPU's - add_gtest_executable(test_ck_tile_gemm_pipeline_mem_wmma_compiler ../gemm/test_gemm_pipeline_mem_wmma.cpp) - target_compile_options(test_ck_tile_gemm_pipeline_mem_wmma_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_gemm_pipeline_mem_wmma_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Basic Batched GEMM Functionality tests -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_batched_gemm_compiler ../batched_gemm/test_batched_gemm.cpp) - target_compile_options(test_ck_tile_batched_gemm_compiler PRIVATE ${COMPILER_TEST_COMPILE_OPTIONS}) - set_tests_properties(test_ck_tile_batched_gemm_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Grouped GEMM - Multi stream execution -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_grouped_gemm_compiler ../grouped_gemm/test_grouped_gemm.cpp) - set_tests_properties(test_ck_tile_grouped_gemm_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Reduce Operations - Important for Normalization -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_reduce2d_compiler ../reduce/test_reduce2d.cpp) - if(result EQUAL 0) - target_link_libraries(test_ck_tile_reduce2d_compiler PRIVATE utility) - endif() - set_tests_properties(test_ck_tile_reduce2d_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Data Type Tests - Quantization & Precision -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_pk_int4_compiler ../data_type/test_pk_int4.cpp) - set_tests_properties(test_ck_tile_pk_int4_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Epilogue Tests - Post Gemm operations -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_cshuffle_epilogue_compiler ../epilogue/test_cshuffle_epilogue.cpp) - set_tests_properties(test_ck_tile_cshuffle_epilogue_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# ElementWise Operations - Basic element operations -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_elementwise_1d_compiler ../elementwise/test_elementwise_1d.cpp) - set_tests_properties(test_ck_tile_elementwise_1d_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -# =============================================================================== -# Container Tests - Core Infrastructure -# =============================================================================== - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_tuple_apply_compiler ../container/test_tuple_apply.cpp) - if(result EQUAL 0) - target_link_libraries(test_ck_tile_tuple_apply_compiler PRIVATE utility) - endif() - set_tests_properties(test_ck_tile_tuple_apply_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Atomic Operations - Synchronization primitives -#================================================================================ - -add_gtest_executable(test_atomic_compiler ../atomic_add_op/test_atomic.cpp) -set_tests_properties(test_atomic_compiler PROPERTIES LABELS "COMPILE_CI") - -# ============================================================================== -# Batched Transpose - Memory Layout transformation -# ============================================================================== - -if(GPU_TARGETS MATCHES "gfx9") - add_gtest_executable(test_ck_tile_batched_transpose_compiler ../batched_transpose/test_batched_transpose.cpp) - set_tests_properties(test_ck_tile_batched_transpose_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Image to Column - Conv Preprocessing -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_tile_image_to_column_compiler ../image_to_column/test_tile_image_to_column.cpp) - set_tests_properties(test_tile_image_to_column_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -#================================================================================ -# Pooling - Important for CNN Operations -#================================================================================ - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - add_gtest_executable(test_ck_tile_pooling_compiler ../pooling/test_pooling.cpp) - set_tests_properties(test_ck_tile_pooling_compiler PROPERTIES LABELS "COMPILE_CI") -endif() - -# ================================================================================ -# Custom Target: Build All Compiler Tests -# ================================================================================ - -add_custom_target(compiler_tests) - -# Add dependencies for all compiler test executables -if(TARGET test_ck_tile_gemm_pipeline_basic_fp16_compiler) - add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_basic_fp16_compiler) -endif() -if(TARGET test_ck_tile_gemm_pipeline_basic_fp8_compiler) - add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_basic_fp8_compiler) -endif() -if(TARGET test_ck_tile_gemm_pipeline_mem_compiler) - add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_mem_compiler) -endif() -if(TARGET test_ck_tile_gemm_pipeline_mem_wmma_compiler) - add_dependencies(compiler_tests test_ck_tile_gemm_pipeline_mem_wmma_compiler) -endif() -if(TARGET test_ck_tile_batched_gemm_compiler) - add_dependencies(compiler_tests test_ck_tile_batched_gemm_compiler) -endif() -if(TARGET test_ck_tile_grouped_gemm_compiler) - add_dependencies(compiler_tests test_ck_tile_grouped_gemm_compiler) -endif() -if(TARGET test_ck_tile_reduce2d_compiler) - add_dependencies(compiler_tests test_ck_tile_reduce2d_compiler) -endif() -if(TARGET test_ck_tile_pk_int4_compiler) - add_dependencies(compiler_tests test_ck_tile_pk_int4_compiler) -endif() -if(TARGET test_ck_tile_cshuffle_epilogue_compiler) - add_dependencies(compiler_tests test_ck_tile_cshuffle_epilogue_compiler) -endif() -if(TARGET test_ck_tile_elementwise_1d_compiler) - add_dependencies(compiler_tests test_ck_tile_elementwise_1d_compiler) -endif() -if(TARGET test_ck_tile_tuple_apply_compiler) - add_dependencies(compiler_tests test_ck_tile_tuple_apply_compiler) -endif() -if(TARGET test_atomic_compiler) - add_dependencies(compiler_tests test_atomic_compiler) -endif() -if(TARGET test_ck_tile_batched_transpose_compiler) - add_dependencies(compiler_tests test_ck_tile_batched_transpose_compiler) -endif() -if(TARGET test_tile_image_to_column_compiler) - add_dependencies(compiler_tests test_tile_image_to_column_compiler) -endif() -if(TARGET test_ck_tile_pooling_compiler) - add_dependencies(compiler_tests test_ck_tile_pooling_compiler) -endif() diff --git a/test/ck_tile/gemm/CMakeLists.txt b/test/ck_tile/gemm/CMakeLists.txt index 24cc1bc5ab..357484199e 100644 --- a/test/ck_tile/gemm/CMakeLists.txt +++ b/test/ck_tile/gemm/CMakeLists.txt @@ -22,6 +22,12 @@ else() message(DEBUG "Skipping ck_tile_gemm tests for current target") endif() + +if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12") + add_gtest_executable(test_gemm_pipeline_compiler test_gemm_pipeline_compiler.cpp) + target_compile_options(test_gemm_pipeline_compiler PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) +endif() + if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12") add_test_executable(test_ck_tile_gemm_pipeline_universal_fp8 test_gemm_pipeline_universal_fp8.cpp) add_test_executable(test_ck_tile_gemm_pipeline_universal_bf8 test_gemm_pipeline_universal_bf8.cpp) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp new file mode 100644 index 0000000000..1509b8850e --- /dev/null +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2024-2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "test_gemm_pipeline_kernel_types.hpp" +#include "test_gemm_pipeline_util.hpp" +#include "gtest/gtest.h" + +// Concise test suite for compiler validation. +// Covers essential combinations of data types, layouts, and pipeline types. + +template +class TestCkTileGemmCompiler : public TestCkTileGemmPipeline> +{ +}; + +#define TEST_SUITE_NAME TestCkTileGemmCompiler + +using CompilerTestTypes = ::testing::Types< + // ============================================================================ + // KernelTypes with Mem pipeline + // Parameters: ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, + // CDataType, M_BlockSize, N_BlockSize, K_BlockSize, M_TileSize, + // N_TileSize, K_TileSize, Scheduler, PipelineType + // ============================================================================ + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, + + // KernelTypes with WMMA Mem pipeline + std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Interwave, Mem>, + std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I64, I64, I32, I16, I16, I16, Interwave, Mem>, + std::tuple< Row, Row, Row, BF8, BF8, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, Mem>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, Mem>, + + // KernelTypes with CompV3 pipeline + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, + std::tuple< Row, Col, Row, F8, F8, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, + std::tuple< Col, Col, Row, INT8, INT8, INT32, INT32, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, + std::tuple< Row, Row, Row, F8, F8, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, + + // KernelTypes with CompV3 pipeline (WMMA) + std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, + std::tuple< Row, Row, Row, BF16, BF16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, + std::tuple< Row, Col, Row, BF8, BF8, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, + + // KernelTypes with CompV4 pipeline + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, + std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, + + // KernelTypes with CompV4 pipeline (WMMA) + std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, + std::tuple< Col, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, + + // KernelTypes with CompV6 pipeline + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, + std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, + + // ============================================================================ + // KernelTypes with Persistent CompV3 pipeline + // Additional Parameter: Persistent (Persistent/NonPersistent mode) + // ============================================================================ + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3, Persistent>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3, NonPersistent>, + + // KernelTypes with Persistent CompV3 pipeline (WMMA) + std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3, Persistent>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3, NonPersistent> +>; + +TYPED_TEST_SUITE(TestCkTileGemmCompiler, CompilerTestTypes); + +// ============================================================================ +// Test Cases +// ============================================================================ + +// Test 1: Single tile - validates basic kernel compilation and execution +TYPED_TEST(TEST_SUITE_NAME, SingleTile) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +// Test 2: Small M - validates edge cases with small batch sizes +TYPED_TEST(TEST_SUITE_NAME, SmallM) +{ + std::vector Ms{1, 4}; // Minimal set for compiler check + constexpr int N = 1024; + constexpr int K = 256; + + for(int M : Ms) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } +} + +// Test 3: Regular size - validates typical production workload +TYPED_TEST(TEST_SUITE_NAME, Regular) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + + this->Run(M, N, K); +} + +// Test 4: Padded K - validates handling of non-aligned K dimension +TYPED_TEST(TEST_SUITE_NAME, PaddK) +{ + constexpr int M = 128; + constexpr int N = 1024; + constexpr int K = 432; // Non-aligned K + + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME \ No newline at end of file From 032663ee962eecdad8e6bf26aaeb5b722ec95375 Mon Sep 17 00:00:00 2001 From: Kumar Date: Sat, 1 Nov 2025 01:55:17 +0530 Subject: [PATCH 04/13] Fix Clang error (EOL) --- test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 1509b8850e..5ff549a780 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -127,4 +127,4 @@ TYPED_TEST(TEST_SUITE_NAME, PaddK) this->Run(M, N, K); } -#undef TEST_SUITE_NAME \ No newline at end of file +#undef TEST_SUITE_NAME From ca9e2d18c6282618afef0970136f494e5ec2ad65 Mon Sep 17 00:00:00 2001 From: Kumar Date: Sat, 1 Nov 2025 01:57:28 +0530 Subject: [PATCH 05/13] Removed compiler subfolder from CMake --- test/ck_tile/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/ck_tile/CMakeLists.txt b/test/ck_tile/CMakeLists.txt index b553b46977..d58c80377a 100644 --- a/test/ck_tile/CMakeLists.txt +++ b/test/ck_tile/CMakeLists.txt @@ -33,4 +33,3 @@ add_subdirectory(atomic_add_op) add_subdirectory(fmha) add_subdirectory(gemm_tile_engine) add_subdirectory(pooling) -add_subdirectory(compiler) From 9a171a4c46a618e64350debc598348c77801d570 Mon Sep 17 00:00:00 2001 From: Kumar Date: Sun, 2 Nov 2025 01:29:38 +0530 Subject: [PATCH 06/13] Add gtest suite for gemm kernel --- .../gemm/test_gemm_pipeline_compiler.cpp | 902 ++++++++++++++++-- 1 file changed, 829 insertions(+), 73 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 5ff549a780..74d0e534ee 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -5,93 +5,285 @@ #include "test_gemm_pipeline_util.hpp" #include "gtest/gtest.h" -// Concise test suite for compiler validation. -// Covers essential combinations of data types, layouts, and pipeline types. +// ============================================================================ +// Comprehensive GEMM Compiler Validation Test Suite +// This file consolidates all GEMM pipeline tests for compiler validation +// Covers essential combinations of data types, layouts, and pipeline types +// ============================================================================ + +// ---------------------------------------------------------------------------- +// Test Class Definitions for Different Pipeline Types +// ---------------------------------------------------------------------------- + +template +class TestGemmMem : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmMemWmma : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmCompV3 : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmCompV3Wmma : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmCompV4 : public TestCkTileGemmPipeline> +{ +}; template -class TestCkTileGemmCompiler : public TestCkTileGemmPipeline> +class TestGemmCompV4Wmma : public TestCkTileGemmPipeline> { }; -#define TEST_SUITE_NAME TestCkTileGemmCompiler +template +class TestGemmCompV6 : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmPersistent : public TestCkTileGemmPipeline> +{ +}; + +template +class TestGemmPersistentWmma : public TestCkTileGemmPipeline> +{ +}; + +// ---------------------------------------------------------------------------- +// Type Definitions for Each Pipeline Configuration +// ---------------------------------------------------------------------------- -using CompilerTestTypes = ::testing::Types< - // ============================================================================ - // KernelTypes with Mem pipeline +// Memory Pipeline Types +using MemTestTypes = ::testing::Types< // Parameters: ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, // CDataType, M_BlockSize, N_BlockSize, K_BlockSize, M_TileSize, // N_TileSize, K_TileSize, Scheduler, PipelineType - // ============================================================================ - std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, Mem>, - - // KernelTypes with WMMA Mem pipeline - std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Interwave, Mem>, - std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I64, I64, I32, I16, I16, I16, Interwave, Mem>, - std::tuple< Row, Row, Row, BF8, BF8, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, Mem>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, Mem>, - - // KernelTypes with CompV3 pipeline - std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, - std::tuple< Row, Col, Row, F8, F8, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, - std::tuple< Col, Col, Row, INT8, INT8, INT32, INT32, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, - std::tuple< Row, Row, Row, F8, F8, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3>, - - // KernelTypes with CompV3 pipeline (WMMA) - std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, - std::tuple< Row, Row, Row, BF16, BF16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, - std::tuple< Row, Col, Row, BF8, BF8, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3>, - - // KernelTypes with CompV4 pipeline - std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, - std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I32, I32, I32, I16, Intrawave, CompV4>, - - // KernelTypes with CompV4 pipeline (WMMA) - std::tuple< Row, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, - std::tuple< Col, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV4>, - - // KernelTypes with CompV6 pipeline - std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, - std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV6>, - - // ============================================================================ - // KernelTypes with Persistent CompV3 pipeline + std::tuple, + std::tuple, + std::tuple, + std::tuple>; + +// Memory Pipeline WMMA Types +using MemWmmaTestTypes = ::testing::Types< + std::tuple, + std::tuple>; + +// CompV3 Pipeline Types +using CompV3TestTypes = ::testing::Types< + std:: + tuple, + std::tuple, + std::tuple, + std::tuple>; + +// CompV3 Pipeline WMMA Types +using CompV3WmmaTestTypes = ::testing::Types< + std::tuple, + std::tuple>; + +// CompV4 Pipeline Types +using CompV4TestTypes = ::testing::Types< + std:: + tuple, + std:: + tuple, + std:: + tuple, + std::tuple>; + +// CompV4 Pipeline WMMA Types +using CompV4WmmaTestTypes = ::testing::Types< + std::tuple, + std::tuple, + std::tuple, + std::tuple>; + +// CompV6 Pipeline Types +using CompV6TestTypes = ::testing::Types< + std:: + tuple, + std:: + tuple, + std:: + tuple, + std::tuple>; + +// Persistent CompV3 Pipeline Types +using PersistentTestTypes = ::testing::Types< // Additional Parameter: Persistent (Persistent/NonPersistent mode) - // ============================================================================ - std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3, Persistent>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I64, I32, I32, I16, Intrawave, CompV3, NonPersistent>, + std::tuple, + std::tuple>; + +// Persistent CompV3 Pipeline WMMA Types +using PersistentWmmaTestTypes = ::testing::Types, + std::tuple>; - // KernelTypes with Persistent CompV3 pipeline (WMMA) - std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3, Persistent>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I64, I64, I32, I16, I16, I16, Intrawave, CompV3, NonPersistent> ->; +// ---------------------------------------------------------------------------- +// Test Suite Registrations +// ---------------------------------------------------------------------------- -TYPED_TEST_SUITE(TestCkTileGemmCompiler, CompilerTestTypes); +TYPED_TEST_SUITE(TestGemmMem, MemTestTypes); +TYPED_TEST_SUITE(TestGemmMemWmma, MemWmmaTestTypes); +TYPED_TEST_SUITE(TestGemmCompV3, CompV3TestTypes); +TYPED_TEST_SUITE(TestGemmCompV3Wmma, CompV3WmmaTestTypes); +TYPED_TEST_SUITE(TestGemmCompV4, CompV4TestTypes); +TYPED_TEST_SUITE(TestGemmCompV4Wmma, CompV4WmmaTestTypes); +TYPED_TEST_SUITE(TestGemmCompV6, CompV6TestTypes); +TYPED_TEST_SUITE(TestGemmPersistent, PersistentTestTypes); +TYPED_TEST_SUITE(TestGemmPersistentWmma, PersistentWmmaTestTypes); // ============================================================================ -// Test Cases +// Memory Pipeline Tests (Mem) // ============================================================================ -// Test 1: Single tile - validates basic kernel compilation and execution -TYPED_TEST(TEST_SUITE_NAME, SingleTile) -{ - this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); -} +#define TEST_SUITE_NAME TestGemmMem -// Test 2: Small M - validates edge cases with small batch sizes -TYPED_TEST(TEST_SUITE_NAME, SmallM) +TYPED_TEST(TEST_SUITE_NAME, SmallM_SingleRow) { - std::vector Ms{1, 4}; // Minimal set for compiler check + std::vector Ms{1}; constexpr int N = 1024; - constexpr int K = 256; + constexpr int K = TestFixture::K_Tile * 2; for(int M : Ms) { @@ -107,23 +299,587 @@ TYPED_TEST(TEST_SUITE_NAME, SmallM) } } -// Test 3: Regular size - validates typical production workload -TYPED_TEST(TEST_SUITE_NAME, Regular) +TYPED_TEST(TEST_SUITE_NAME, SingleTile) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, ExactlyTwoTiles_M) +{ + this->Run(TestFixture::M_Tile * 2, TestFixture::N_Tile, TestFixture::K_Tile * 2); +} + +TYPED_TEST(TEST_SUITE_NAME, ExactlyTwoTiles_N) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile * 2, TestFixture::K_Tile * 2); +} + +TYPED_TEST(TEST_SUITE_NAME, ExactlyTwoTiles_K) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile * 2); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_512x1024x512) { constexpr int M = 512; constexpr int N = 1024; constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, Square_1024x1024x1024) +{ + constexpr int M = 1024; + constexpr int N = 1024; + constexpr int K = 1024; + this->Run(M, N, K); +} +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_2048x2048x2048) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, VeryLargeMatrix_4096x4096x4096) +{ + constexpr int M = 4096; + constexpr int N = 4096; + constexpr int K = 4096; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, TallSkinny_4096x128x1024) +{ + constexpr int M = 4096; + constexpr int N = 128; + constexpr int K = 1024; this->Run(M, N, K); } -// Test 4: Padded K - validates handling of non-aligned K dimension -TYPED_TEST(TEST_SUITE_NAME, PaddK) +TYPED_TEST(TEST_SUITE_NAME, ShortWide_128x4096x1024) { constexpr int M = 128; + constexpr int N = 4096; + constexpr int K = 1024; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, DeepNarrow_2048x2048x8192) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 8192; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, StressTest_ExtremelyTallMatrix) +{ + constexpr int M = 16384; + constexpr int N = 64; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, StressTest_ExtremelyWideMatrix) +{ + constexpr int M = 64; + constexpr int N = 16384; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, StressTest_VeryDeepK) +{ + constexpr int M = 1024; + constexpr int N = 1024; + constexpr int K = 16384; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Memory Pipeline Tests with WMMA +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmMemWmma + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_WMMA) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_WMMA) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_WMMA) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Compute V3 Pipeline Tests +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmCompV3 + +TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV3) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; + constexpr int N = 1024; + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV3) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, MidLargeM_CompV3) +{ + std::vector Ms{127, 255, 312, 799, 1573}; + constexpr int N = 1024; + + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + constexpr int VecLoadSize = (std::is_same_v || + std::is_same_v || + std::is_same_v) + ? 16 + : 8; + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + if(M % VecLoadSize == 0) + { + this->Run(M, N, K); + } + else + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_CompV3) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV3) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, BatchedSmall_CompV3) +{ + constexpr int M = 256; + constexpr int N = 256; + constexpr int K = 256; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Compute V3 Pipeline Tests with WMMA +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmCompV3Wmma + +TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV3Wmma) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; + constexpr int N = 1024; + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV3Wmma) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_CompV3Wmma) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV3Wmma) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Compute V4 Pipeline Tests +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmCompV4 + +TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV4) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; + constexpr int N = 1024; + std::vector Ks; + for(auto K_count : {2, 3, 4}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV4) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_CompV4) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV4) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Compute V4 Pipeline Tests with WMMA +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmCompV4Wmma + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV4Wmma) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_CompV4Wmma) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV4Wmma) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Compute V6 Pipeline Tests +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmCompV6 + +TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV6) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; + constexpr int N = 1024; + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV6) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, MidLargeM_CompV6) +{ + std::vector Ms{127, 255, 312, 799, 1573}; + constexpr int N = 1024; + + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + constexpr int VecLoadSize = (std::is_same_v || + std::is_same_v || + std::is_same_v) + ? 16 + : 8; + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + if(M % VecLoadSize == 0) + { + this->Run(M, N, K); + } + else + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_CompV6) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV6) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Persistent Kernel Tests +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmPersistent + +TYPED_TEST(TEST_SUITE_NAME, SmallM_Persistent) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; + constexpr int N = 1024; + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} + +TYPED_TEST(TEST_SUITE_NAME, SingleTile_Persistent) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_Persistent) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_Persistent) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; + this->Run(M, N, K); +} + +#undef TEST_SUITE_NAME + +// ============================================================================ +// Persistent Kernel Tests with WMMA +// ============================================================================ + +#define TEST_SUITE_NAME TestGemmPersistentWmma + +TYPED_TEST(TEST_SUITE_NAME, SmallM_PersistentWmma) +{ + std::vector Ms{1, 2, 3, 4, 5, 6}; constexpr int N = 1024; - constexpr int K = 432; // Non-aligned K + std::vector Ks; + for(auto K_count : {2, 3, 4, 10, 11}) + { + Ks.push_back(K_count * TestFixture::K_Tile); + } + + for(int M : Ms) + { + for(int K : Ks) + { + if constexpr(std::is_same_v) + { + EXPECT_THROW((this->Run(M, N, K)), std::runtime_error); + } + else + { + this->Run(M, N, K); + } + } + } +} +TYPED_TEST(TEST_SUITE_NAME, SingleTile_PersistentWmma) +{ + this->Run(TestFixture::M_Tile, TestFixture::N_Tile, TestFixture::K_Tile); +} + +TYPED_TEST(TEST_SUITE_NAME, Regular_PersistentWmma) +{ + constexpr int M = 512; + constexpr int N = 1024; + constexpr int K = 512; + this->Run(M, N, K); +} + +TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_PersistentWmma) +{ + constexpr int M = 2048; + constexpr int N = 2048; + constexpr int K = 2048; this->Run(M, N, K); } From e2282f64592e0d73b5b064b4d9e51bf1f8d63fe4 Mon Sep 17 00:00:00 2001 From: Kumar Date: Mon, 3 Nov 2025 11:34:36 +0530 Subject: [PATCH 07/13] Disable failed tests --- .../gemm/test_gemm_pipeline_compiler.cpp | 156 ++++++++---------- 1 file changed, 68 insertions(+), 88 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 74d0e534ee..90edfdb0c4 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -66,13 +66,11 @@ class TestGemmPersistentWmma : public TestCkTileGemmPipeline, - std::tuple, - std::tuple, - std::tuple>; + // Parameters: ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType, + // M_BlockSize, N_BlockSize, K_BlockSize, M_TileSize, N_TileSize, K_TileSize, Scheduler, + // PipelineType + + std::tuple>; // Memory Pipeline WMMA Types using MemWmmaTestTypes = ::testing::Types< @@ -80,40 +78,36 @@ using MemWmmaTestTypes = ::testing::Types< std::tuple>; // CompV3 Pipeline Types -using CompV3TestTypes = ::testing::Types< - std:: - tuple, - std::tuple, - std::tuple, - std::tuple>; +using CompV3TestTypes = ::testing::Types, + std::tuple>; // CompV3 Pipeline WMMA Types using CompV3WmmaTestTypes = ::testing::Types< @@ -135,28 +129,21 @@ using CompV3WmmaTestTypes = ::testing::Types< CompV3>>; // CompV4 Pipeline Types -using CompV4TestTypes = ::testing::Types< - std:: - tuple, - std:: - tuple, - std:: - tuple, - std::tuple>; +using CompV4TestTypes = ::testing::Types>; // CompV4 Pipeline WMMA Types using CompV4WmmaTestTypes = ::testing::Types< @@ -166,28 +153,21 @@ using CompV4WmmaTestTypes = ::testing::Types< std::tuple>; // CompV6 Pipeline Types -using CompV6TestTypes = ::testing::Types< - std:: - tuple, - std:: - tuple, - std:: - tuple, - std::tuple>; +using CompV6TestTypes = ::testing::Types>; // Persistent CompV3 Pipeline Types using PersistentTestTypes = ::testing::Types< @@ -267,10 +247,10 @@ TYPED_TEST_SUITE(TestGemmMem, MemTestTypes); TYPED_TEST_SUITE(TestGemmMemWmma, MemWmmaTestTypes); TYPED_TEST_SUITE(TestGemmCompV3, CompV3TestTypes); TYPED_TEST_SUITE(TestGemmCompV3Wmma, CompV3WmmaTestTypes); -TYPED_TEST_SUITE(TestGemmCompV4, CompV4TestTypes); +// TYPED_TEST_SUITE(TestGemmCompV4, CompV4TestTypes); TYPED_TEST_SUITE(TestGemmCompV4Wmma, CompV4WmmaTestTypes); -TYPED_TEST_SUITE(TestGemmCompV6, CompV6TestTypes); -TYPED_TEST_SUITE(TestGemmPersistent, PersistentTestTypes); +// TYPED_TEST_SUITE(TestGemmCompV6, CompV6TestTypes); +// TYPED_TEST_SUITE(TestGemmPersistent, PersistentTestTypes); TYPED_TEST_SUITE(TestGemmPersistentWmma, PersistentWmmaTestTypes); // ============================================================================ From 2699c846fae639172be3d1d4cb1e31722f38110b Mon Sep 17 00:00:00 2001 From: Kumar Date: Mon, 3 Nov 2025 14:32:38 +0530 Subject: [PATCH 08/13] Fix build errors --- .../gemm/test_gemm_pipeline_compiler.cpp | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 90edfdb0c4..7781849142 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -129,21 +129,11 @@ using CompV3WmmaTestTypes = ::testing::Types< CompV3>>; // CompV4 Pipeline Types -using CompV4TestTypes = ::testing::Types>; +using CompV4TestTypes = ::testing::Types< + std::tuple, + std::tuple, + std:: + tuple>; // CompV4 Pipeline WMMA Types using CompV4WmmaTestTypes = ::testing::Types< @@ -179,8 +169,8 @@ using PersistentTestTypes = ::testing::Types< F16, F32, F16, - I256, - I256, + I64, + I64, I64, I32, I32, @@ -188,6 +178,7 @@ using PersistentTestTypes = ::testing::Types< Intrawave, CompV3, Persistent>, + std::tuple Date: Tue, 4 Nov 2025 11:41:20 +0530 Subject: [PATCH 09/13] Resolved PR comments --- .../gemm/test_gemm_pipeline_compiler.cpp | 152 ++++++++++-------- 1 file changed, 83 insertions(+), 69 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 7781849142..25eac0a475 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -70,7 +70,8 @@ using MemTestTypes = ::testing::Types< // M_BlockSize, N_BlockSize, K_BlockSize, M_TileSize, N_TileSize, K_TileSize, Scheduler, // PipelineType - std::tuple>; + std::tuple, + std::tuple>; // Memory Pipeline WMMA Types using MemWmmaTestTypes = ::testing::Types< @@ -78,36 +79,23 @@ using MemWmmaTestTypes = ::testing::Types< std::tuple>; // CompV3 Pipeline Types -using CompV3TestTypes = ::testing::Types, - std::tuple>; +using CompV3TestTypes = ::testing::Types< + std::tuple, + std::tuple>; // CompV3 Pipeline WMMA Types using CompV3WmmaTestTypes = ::testing::Types< @@ -130,34 +118,60 @@ using CompV3WmmaTestTypes = ::testing::Types< // CompV4 Pipeline Types using CompV4TestTypes = ::testing::Types< - std::tuple, - std::tuple, - std:: - tuple>; + std::tuple, + std::tuple>; // CompV4 Pipeline WMMA Types using CompV4WmmaTestTypes = ::testing::Types< std::tuple, - std::tuple, - std::tuple, - std::tuple>; + std::tuple>; // CompV6 Pipeline Types -using CompV6TestTypes = ::testing::Types>; +using CompV6TestTypes = ::testing::Types< + std::tuple, + std::tuple>; // Persistent CompV3 Pipeline Types using PersistentTestTypes = ::testing::Types< @@ -409,10 +423,10 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_WMMA) TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV3) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -441,11 +455,11 @@ TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV3) TYPED_TEST(TEST_SUITE_NAME, MidLargeM_CompV3) { - std::vector Ms{127, 255, 312, 799, 1573}; + std::vector Ms{127, 255}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -514,10 +528,10 @@ TYPED_TEST(TEST_SUITE_NAME, BatchedSmall_CompV3) TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV3Wmma) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -570,10 +584,10 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV3Wmma) TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV4) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -655,10 +669,10 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV4Wmma) TYPED_TEST(TEST_SUITE_NAME, SmallM_CompV6) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -687,11 +701,11 @@ TYPED_TEST(TEST_SUITE_NAME, SingleTile_CompV6) TYPED_TEST(TEST_SUITE_NAME, MidLargeM_CompV6) { - std::vector Ms{127, 255, 312, 799, 1573}; + std::vector Ms{127, 255}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -752,10 +766,10 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV6) TYPED_TEST(TEST_SUITE_NAME, SmallM_Persistent) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } @@ -808,10 +822,10 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_Persistent) TYPED_TEST(TEST_SUITE_NAME, SmallM_PersistentWmma) { - std::vector Ms{1, 2, 3, 4, 5, 6}; + std::vector Ms{1, 2}; constexpr int N = 1024; std::vector Ks; - for(auto K_count : {2, 3, 4, 10, 11}) + for(auto K_count : {2, 4}) { Ks.push_back(K_count * TestFixture::K_Tile); } From dc08aa87f9f6d710ebe7eb373291fe93da578478 Mon Sep 17 00:00:00 2001 From: Kumar Date: Tue, 4 Nov 2025 19:58:46 +0530 Subject: [PATCH 10/13] Update shape for persistent gemm kernel test --- test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 25eac0a475..6674b07969 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -183,8 +183,8 @@ using PersistentTestTypes = ::testing::Types< F16, F32, F16, - I64, - I64, + I256, + I256, I64, I32, I32, @@ -192,7 +192,6 @@ using PersistentTestTypes = ::testing::Types< Intrawave, CompV3, Persistent>, - std::tuple Date: Thu, 6 Nov 2025 12:52:42 +0530 Subject: [PATCH 11/13] Seperated types by H/W archs --- .../gemm/test_gemm_pipeline_compiler.cpp | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 6674b07969..11db6206a0 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -20,30 +20,36 @@ class TestGemmMem : public TestCkTileGemmPipeline> { }; +#if defined(CK_TILE_USE_WMMA) template class TestGemmMemWmma : public TestCkTileGemmPipeline> { }; +#endif template class TestGemmCompV3 : public TestCkTileGemmPipeline> { }; +#if defined(CK_TILE_USE_WMMA) template class TestGemmCompV3Wmma : public TestCkTileGemmPipeline> { }; +#endif template class TestGemmCompV4 : public TestCkTileGemmPipeline> { }; +#if defined(CK_TILE_USE_WMMA) template class TestGemmCompV4Wmma : public TestCkTileGemmPipeline> { }; +#endif template class TestGemmCompV6 : public TestCkTileGemmPipeline> @@ -55,10 +61,12 @@ class TestGemmPersistent : public TestCkTileGemmPipeline class TestGemmPersistentWmma : public TestCkTileGemmPipeline> { }; +#endif // ---------------------------------------------------------------------------- // Type Definitions for Each Pipeline Configuration @@ -73,10 +81,12 @@ using MemTestTypes = ::testing::Types< std::tuple, std::tuple>; +#if defined(CK_TILE_USE_WMMA) // Memory Pipeline WMMA Types using MemWmmaTestTypes = ::testing::Types< std::tuple, std::tuple>; +#endif // CompV3 Pipeline Types using CompV3TestTypes = ::testing::Types< @@ -97,6 +107,7 @@ using CompV3TestTypes = ::testing::Types< Intrawave, CompV3>>; +#if defined(CK_TILE_USE_WMMA) // CompV3 Pipeline WMMA Types using CompV3WmmaTestTypes = ::testing::Types< std::tuple, @@ -115,6 +126,7 @@ using CompV3WmmaTestTypes = ::testing::Types< I16, Intrawave, CompV3>>; +#endif // CompV4 Pipeline Types using CompV4TestTypes = ::testing::Types< @@ -135,6 +147,7 @@ using CompV4TestTypes = ::testing::Types< Intrawave, CompV4>>; +#if defined(CK_TILE_USE_WMMA) // CompV4 Pipeline WMMA Types using CompV4WmmaTestTypes = ::testing::Types< std::tuple, @@ -153,6 +166,7 @@ using CompV4WmmaTestTypes = ::testing::Types< I16, Intrawave, CompV4>>; +#endif // CompV6 Pipeline Types using CompV6TestTypes = ::testing::Types< @@ -209,6 +223,7 @@ using PersistentTestTypes = ::testing::Types< CompV3, NonPersistent>>; +#if defined(CK_TILE_USE_WMMA) // Persistent CompV3 Pipeline WMMA Types using PersistentWmmaTestTypes = ::testing::Types>; +#endif // ---------------------------------------------------------------------------- // Test Suite Registrations // ---------------------------------------------------------------------------- TYPED_TEST_SUITE(TestGemmMem, MemTestTypes); +#if defined(CK_TILE_USE_WMMA) TYPED_TEST_SUITE(TestGemmMemWmma, MemWmmaTestTypes); +#endif TYPED_TEST_SUITE(TestGemmCompV3, CompV3TestTypes); +#if defined(CK_TILE_USE_WMMA) TYPED_TEST_SUITE(TestGemmCompV3Wmma, CompV3WmmaTestTypes); +#endif TYPED_TEST_SUITE(TestGemmCompV4, CompV4TestTypes); +#if defined(CK_TILE_USE_WMMA) TYPED_TEST_SUITE(TestGemmCompV4Wmma, CompV4WmmaTestTypes); +#endif TYPED_TEST_SUITE(TestGemmCompV6, CompV6TestTypes); TYPED_TEST_SUITE(TestGemmPersistent, PersistentTestTypes); +#if defined(CK_TILE_USE_WMMA) TYPED_TEST_SUITE(TestGemmPersistentWmma, PersistentWmmaTestTypes); +#endif // ============================================================================ // Memory Pipeline Tests (Mem) @@ -385,6 +409,7 @@ TYPED_TEST(TEST_SUITE_NAME, StressTest_VeryDeepK) #undef TEST_SUITE_NAME +#if defined(CK_TILE_USE_WMMA) // ============================================================================ // Memory Pipeline Tests with WMMA // ============================================================================ @@ -413,6 +438,7 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_WMMA) } #undef TEST_SUITE_NAME +#endif // CK_TILE_USE_WMMA // ============================================================================ // Compute V3 Pipeline Tests @@ -519,6 +545,7 @@ TYPED_TEST(TEST_SUITE_NAME, BatchedSmall_CompV3) #undef TEST_SUITE_NAME +#if defined(CK_TILE_USE_WMMA) // ============================================================================ // Compute V3 Pipeline Tests with WMMA // ============================================================================ @@ -574,6 +601,7 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV3Wmma) } #undef TEST_SUITE_NAME +#endif // CK_TILE_USE_WMMA // ============================================================================ // Compute V4 Pipeline Tests @@ -631,6 +659,7 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV4) #undef TEST_SUITE_NAME +#if defined(CK_TILE_USE_WMMA) // ============================================================================ // Compute V4 Pipeline Tests with WMMA // ============================================================================ @@ -659,6 +688,7 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_CompV4Wmma) } #undef TEST_SUITE_NAME +#endif // CK_TILE_USE_WMMA // ============================================================================ // Compute V6 Pipeline Tests @@ -813,6 +843,7 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_Persistent) #undef TEST_SUITE_NAME +#if defined(CK_TILE_USE_WMMA) // ============================================================================ // Persistent Kernel Tests with WMMA // ============================================================================ @@ -868,3 +899,4 @@ TYPED_TEST(TEST_SUITE_NAME, LargeMatrix_PersistentWmma) } #undef TEST_SUITE_NAME +#endif // CK_TILE_USE_WMMA From 14b92b77896d88f0334590c1d0f8327ea0aaec8d Mon Sep 17 00:00:00 2001 From: Kumar Date: Thu, 6 Nov 2025 17:06:43 +0530 Subject: [PATCH 12/13] Made changes to persistent types --- test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 11db6206a0..9e29a31f97 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -200,8 +200,8 @@ using PersistentTestTypes = ::testing::Types< I256, I256, I64, - I32, - I32, + I16, + I16, I16, Intrawave, CompV3, @@ -216,8 +216,8 @@ using PersistentTestTypes = ::testing::Types< I256, I256, I64, - I32, - I32, + I16, + I16, I16, Intrawave, CompV3, From 03883a9bf06d434b220c20d195508930351a7ed8 Mon Sep 17 00:00:00 2001 From: Kumar Date: Fri, 7 Nov 2025 18:24:51 +0530 Subject: [PATCH 13/13] Fix persistent build failure issue --- .../gemm/test_gemm_pipeline_compiler.cpp | 66 +++++++++---------- 1 file changed, 32 insertions(+), 34 deletions(-) diff --git a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp index 9e29a31f97..bf39e0b552 100644 --- a/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp +++ b/test/ck_tile/gemm/test_gemm_pipeline_compiler.cpp @@ -188,40 +188,38 @@ using CompV6TestTypes = ::testing::Types< CompV6>>; // Persistent CompV3 Pipeline Types -using PersistentTestTypes = ::testing::Types< - // Additional Parameter: Persistent (Persistent/NonPersistent mode) - std::tuple, - std::tuple>; +using PersistentTestTypes = ::testing::Types, + std::tuple>; #if defined(CK_TILE_USE_WMMA) // Persistent CompV3 Pipeline WMMA Types