From 7b8d7fd4a0b818d12bf8de432fff50f7f4101a98 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Mon, 6 Oct 2025 20:54:18 +0000 Subject: [PATCH 01/18] Load tuned multi-lora kernel configs from json files --- .../NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json | 15074 ++++++++++++ .../NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json | 15074 ++++++++++++ .../configs/NVIDIA_H100_80GB_HBM3_SHRINK.json | 19430 ++++++++++++++++ vllm/lora/ops/triton_ops/lora_expand_op.py | 21 +- vllm/lora/ops/triton_ops/lora_shrink_op.py | 23 +- vllm/lora/ops/triton_ops/utils.py | 124 + 6 files changed, 49730 insertions(+), 16 deletions(-) create mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json create mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json create mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json new file mode 100644 index 000000000000..67cf5e468c48 --- /dev/null +++ b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json @@ -0,0 +1,15074 @@ +{ + "1": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 256, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + } + }, + "4": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 1024, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + } + }, + "8": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 1024, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + } + } +} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json new file mode 100644 index 000000000000..58fa47dbc64f --- /dev/null +++ b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json @@ -0,0 +1,15074 @@ +{ + "1": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + } + }, + "4": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 128, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + } + }, + "8": { + "1": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 1024, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 16, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 16, + "block_n": 512, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "16": { + "1024": { + "block_m": 16, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "32": { + "16": { + "1024": { + "block_m": 32, + "block_n": 64, + "block_k": 16, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "16": { + "1024": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 16, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "16": { + "1024": { + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 64, + "block_n": 32, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "16": { + "1024": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "16": { + "1024": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 32, + "block_n": 256, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "16": { + "1024": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "2048": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "4096": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "8192": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + }, + "12288": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "16384": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "24576": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + }, + "32768": { + "block_m": 128, + "block_n": 64, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + } + } +} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json new file mode 100644 index 000000000000..20edd283b2c3 --- /dev/null +++ b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json @@ -0,0 +1,19430 @@ +{ + "1": { + "1": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + } + }, + "4": { + "1": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 512, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + } + } + }, + "8": { + "1": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "2": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 512, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + }, + "3": { + "1": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 128, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "16": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 32, + "split_k": 64, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "32": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "64": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "128": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "192": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "256": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 256, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "320": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "384": { + "1024": { + "16": { + "block_m": 16, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "448": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 32, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "512": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "640": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "768": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "896": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 32, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "1024": { + "1024": { + "16": { + "block_m": 32, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "2048": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "3072": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "4096": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "5120": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "6144": { + "1024": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 256, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + }, + "7168": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + } + }, + "8192": { + "1024": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "2048": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "4096": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "8192": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 32, + "split_k": 8, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "12288": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "16384": { + "16": { + "block_m": 64, + "block_n": 16, + "block_k": 64, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 4, + "max_nreg": null + } + }, + "24576": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + }, + "32768": { + "16": { + "block_m": 128, + "block_n": 16, + "block_k": 128, + "split_k": 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": null + } + } + } + } + } +} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index a7a552b9903d..485cf16c42bd 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -10,7 +10,7 @@ import torch from vllm.lora.ops.triton_ops.kernel_utils import do_expand_kernel -from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr +from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr, get_v1_op_configs from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -201,12 +201,19 @@ def _lora_expand( NUM_SLICES = len(lora_b_weights) # Triton kernel configs. - BLOCK_M = 64 - BLOCK_N = 128 - BLOCK_K = 16 - NUM_WARPS = 4 - NUM_CTAS = 1 - NUM_STAGES = 2 + kernel_config = get_v1_op_configs(op_type="expand", + max_loras=MAX_LORAS, + batch=M, + hidden_size=MAX_N, + rank=K, + num_slices=NUM_SLICES, + add_inputs=add_inputs) + BLOCK_M = kernel_config['block_m'] + BLOCK_N = kernel_config['block_n'] + BLOCK_K = kernel_config['block_k'] + NUM_WARPS = kernel_config['num_warps'] + NUM_CTAS = kernel_config['num_ctas'] + NUM_STAGES = kernel_config['num_stages'] EVEN_K = K % BLOCK_K == 0 # type: ignore diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 1e7e43e30de7..93b311cae1c0 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -10,7 +10,7 @@ import torch from vllm.lora.ops.triton_ops.kernel_utils import do_shrink_kernel -from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr +from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr, get_v1_op_configs from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -177,14 +177,19 @@ def _lora_shrink( MAX_LORAS = lora_ids.size(0) # Triton kernel configs - BLOCK_M = 32 - BLOCK_N = 16 - BLOCK_K = 256 if M < 128 else 32 - SPLIT_K = 64 if M < 128 else 8 - NUM_WARPS = 4 - NUM_CTAS = 1 - NUM_STAGES = 2 - + kernel_config = get_v1_op_configs("shrink", + max_loras=MAX_LORAS, + batch=M, + hidden_size=K, + rank=N, + num_slices=NUM_SLICES) + BLOCK_M = kernel_config['block_m'] + BLOCK_N = kernel_config['block_n'] + BLOCK_K = kernel_config['block_k'] + SPLIT_K = kernel_config['split_k'] + NUM_WARPS = kernel_config['num_warps'] + NUM_STAGES = kernel_config['num_stages'] + NUM_CTAS = kernel_config['num_ctas'] EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 3a3e8fc8931e..489da926b2aa 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -2,6 +2,13 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import torch +import functools +import json +import os +from pathlib import Path +from typing import Any, Dict, Optional +from vllm.logger import init_logger +logger = init_logger(__name__) _LORA_A_PTR_DICT: dict[tuple[int, ...], tuple[torch.tensor, ...]] = {} _LORA_B_PTR_DICT: dict[tuple[int, ...], tuple[torch.tensor, ...]] = {} @@ -133,3 +140,120 @@ def _get_lora_b_ptr( MAX_N, ) return _LORA_B_PTR_DICT.get(key) + +@functools.lru_cache +def load_v1_op_config(op_type: str, + add_inputs: Optional[bool]) -> Optional[Dict]: + gpu_name = torch.cuda.get_device_name() + gpu_name = gpu_name.replace(' ', '_') + gpu_name = gpu_name.replace('-', '_') + + config_fname = None + if op_type == "shrink": + config_fname = f"{gpu_name}_{op_type.upper()}.json" + else: + assert op_type == "expand" or op_type == "fused" + config_fname = (f"{gpu_name}_" + f"{op_type.upper()}_" + f"{str(add_inputs).upper()}.json") + + config_path = Path( + f'{os.path.dirname(os.path.realpath(__file__))}/configs/{config_fname}' + ) + if not config_path.exists(): + logger.warning_once(f"Didn't find config path {config_path}, using default lora kernel configs") + return None + + # Load json + logger.warning_once(f"Using LoRA configs from {config_path}.") + config_data = None + with open(str(config_path)) as f: + config_data = json.load(f) + return config_data + + +@functools.lru_cache +def get_v1_op_configs( + op_type: str, + max_loras: int, + batch: int, + hidden_size: int, + rank: int, + num_slices: int, + out_dim: Optional[int] = None, + add_inputs: Optional[bool] = None) -> dict[str, Optional[int]]: + + assert op_type in ["shrink", "expand"] + + # default config + default = {} + if op_type == "shrink": + default = { + 'block_m': 32, + 'block_n': 16, + 'block_k': 256 if batch < 128 else 32, + 'split_k': 64 if batch < 128 else 8, + 'num_warps': 4, + 'num_ctas': 1, + 'num_stages': 2, + 'max_nreg': None + } + else: + default = { + 'block_m': 64, + 'block_n': 128, + 'block_k': 16, + 'num_warps': 4, + 'num_ctas': 1, + 'num_stages': 2, + 'max_nreg': None + } + + m = batch + if op_type == "shrink": + k, n = (hidden_size, rank) + elif op_type == "expand": + k, n = (rank, hidden_size) + else: + k, n, r = (hidden_size, out_dim, rank) + + config_data: Any + config_data = load_v1_op_config(op_type, add_inputs) + if not config_data: + return default + if op_type == "fused": + config_data = config_data.get(str(max_loras)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - max_loras))] + # slice by num_slices + config_data = config_data[str(num_slices)] + # slice by m + config_data = config_data.get(str(m)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - m))] + # slice by k + config_data = config_data.get(str(k)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - k))] + # slice by n + config_data = config_data.get(str(n)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - n))] + config_data = config_data.get(str(n)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - r))] + + else: + # config is structured as config_data[max_loras][num_slices][m][k][n] = {} + # slice by max_loras + config_data = config_data.get(str(max_loras)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - max_loras))] + # slice by num_slices + config_data = config_data[str(num_slices)] + # slice by m + config_data = config_data.get(str(m)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - m))] + # slice by k + config_data = config_data.get(str(k)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - k))] + # slice by n + config_data = config_data.get(str(n)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - n))] + + assert config_data is not None + return config_data \ No newline at end of file From 568c28830487dd5fd011dd613aa120ae719458ce Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Mon, 6 Oct 2025 21:18:13 +0000 Subject: [PATCH 02/18] code clean --- vllm/lora/ops/triton_ops/utils.py | 48 ++++++++++--------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 489da926b2aa..d521746f9251 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -152,7 +152,7 @@ def load_v1_op_config(op_type: str, if op_type == "shrink": config_fname = f"{gpu_name}_{op_type.upper()}.json" else: - assert op_type == "expand" or op_type == "fused" + assert op_type == "expand" config_fname = (f"{gpu_name}_" f"{op_type.upper()}_" f"{str(add_inputs).upper()}.json") @@ -221,39 +221,21 @@ def get_v1_op_configs( config_data = load_v1_op_config(op_type, add_inputs) if not config_data: return default - if op_type == "fused": - config_data = config_data.get(str(max_loras)) or config_data[min( + # config is structured as config_data[max_loras][num_slices][m][k][n] = {} + # slice by max_loras + config_data = config_data.get(str(max_loras)) or config_data[min( config_data.keys(), key=lambda x: abs(int(x) - max_loras))] - # slice by num_slices - config_data = config_data[str(num_slices)] - # slice by m - config_data = config_data.get(str(m)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - m))] - # slice by k - config_data = config_data.get(str(k)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - k))] - # slice by n - config_data = config_data.get(str(n)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - n))] - config_data = config_data.get(str(n)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - r))] - - else: - # config is structured as config_data[max_loras][num_slices][m][k][n] = {} - # slice by max_loras - config_data = config_data.get(str(max_loras)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - max_loras))] - # slice by num_slices - config_data = config_data[str(num_slices)] - # slice by m - config_data = config_data.get(str(m)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - m))] - # slice by k - config_data = config_data.get(str(k)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - k))] - # slice by n - config_data = config_data.get(str(n)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - n))] + # slice by num_slices + config_data = config_data[str(num_slices)] + # slice by m + config_data = config_data.get(str(m)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - m))] + # slice by k + config_data = config_data.get(str(k)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - k))] + # slice by n + config_data = config_data.get(str(n)) or config_data[min( + config_data.keys(), key=lambda x: abs(int(x) - n))] assert config_data is not None return config_data \ No newline at end of file From 16b1d30c8b99307144632acfa2fb6ee0f8e8bf09 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Mon, 6 Oct 2025 21:21:29 +0000 Subject: [PATCH 03/18] clean --- vllm/lora/ops/triton_ops/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index d521746f9251..39bd0cd14363 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -212,10 +212,8 @@ def get_v1_op_configs( m = batch if op_type == "shrink": k, n = (hidden_size, rank) - elif op_type == "expand": - k, n = (rank, hidden_size) else: - k, n, r = (hidden_size, out_dim, rank) + k, n = (rank, hidden_size) config_data: Any config_data = load_v1_op_config(op_type, add_inputs) From bc0f7c702ae6df34e07e7c9f2c4c92e76b24230b Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 8 Oct 2025 23:14:09 +0000 Subject: [PATCH 04/18] using ENV to pass in config folder path --- vllm/lora/ops/triton_ops/utils.py | 63 +++++++++++++++++-------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 39bd0cd14363..0b5c727c016d 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -7,6 +7,7 @@ import os from pathlib import Path from typing import Any, Dict, Optional +from vllm import envs from vllm.logger import init_logger logger = init_logger(__name__) @@ -141,34 +142,40 @@ def _get_lora_b_ptr( ) return _LORA_B_PTR_DICT.get(key) + @functools.lru_cache def load_v1_op_config(op_type: str, add_inputs: Optional[bool]) -> Optional[Dict]: - gpu_name = torch.cuda.get_device_name() - gpu_name = gpu_name.replace(' ', '_') - gpu_name = gpu_name.replace('-', '_') + + user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER + if user_defined_config_folder is not None: + gpu_name = torch.cuda.get_device_name() + gpu_name = gpu_name.replace(' ', '_') + gpu_name = gpu_name.replace('-', '_') + + config_fname = None + if op_type == "shrink": + config_fname = f"{gpu_name}_{op_type.upper()}.json" + else: + assert op_type == "expand" + config_fname = (f"{gpu_name}_" + f"{op_type.upper()}_" + f"{str(add_inputs).upper()}.json") - config_fname = None - if op_type == "shrink": - config_fname = f"{gpu_name}_{op_type.upper()}.json" + config_path = Path( + f'{user_defined_config_folder}/{config_fname}' + ) + if not config_path.exists(): + logger.warning_once(f"No LoRA kernel config files found in the provided folder") + return None + + # Load json + logger.info_once(f"Using tuned LoRA kernel configs from {config_path}.") + with open(str(config_path)) as f: + config_data = json.load(f) else: - assert op_type == "expand" - config_fname = (f"{gpu_name}_" - f"{op_type.upper()}_" - f"{str(add_inputs).upper()}.json") - - config_path = Path( - f'{os.path.dirname(os.path.realpath(__file__))}/configs/{config_fname}' - ) - if not config_path.exists(): - logger.warning_once(f"Didn't find config path {config_path}, using default lora kernel configs") - return None - - # Load json - logger.warning_once(f"Using LoRA configs from {config_path}.") - config_data = None - with open(str(config_path)) as f: - config_data = json.load(f) + config_data = None + return config_data @@ -180,7 +187,6 @@ def get_v1_op_configs( hidden_size: int, rank: int, num_slices: int, - out_dim: Optional[int] = None, add_inputs: Optional[bool] = None) -> dict[str, Optional[int]]: assert op_type in ["shrink", "expand"] @@ -208,17 +214,16 @@ def get_v1_op_configs( 'num_stages': 2, 'max_nreg': None } - m = batch - if op_type == "shrink": - k, n = (hidden_size, rank) - else: - k, n = (rank, hidden_size) + + k, n = (hidden_size, rank) if op_type == "shrink" else (rank, hidden_size) config_data: Any config_data = load_v1_op_config(op_type, add_inputs) if not config_data: + logger.warning_once(f"Using default LoRA kernel configs") return default + # config is structured as config_data[max_loras][num_slices][m][k][n] = {} # slice by max_loras config_data = config_data.get(str(max_loras)) or config_data[min( From 7dc30b1da014a3b580c787335356c9c868ed9bcc Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Fri, 10 Oct 2025 00:49:27 +0000 Subject: [PATCH 05/18] add README --- vllm/lora/ops/triton_ops/README_TUNING.md | 29 +++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 vllm/lora/ops/triton_ops/README_TUNING.md diff --git a/vllm/lora/ops/triton_ops/README_TUNING.md b/vllm/lora/ops/triton_ops/README_TUNING.md new file mode 100644 index 000000000000..a572a9659c7d --- /dev/null +++ b/vllm/lora/ops/triton_ops/README_TUNING.md @@ -0,0 +1,29 @@ +# Multi-LoRA Tuning + +**Note**: The lora config folder should be passed in by export VLLM_TUNED_CONFIG_FOLDER=/path/to/configs. Without it, the kernel would use default configs + +## Tuning Process +Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from [Triton MoE tuning](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_moe.py). An example searching space: + +``` + block_m_range = [16, 32, 64, 128, 256] + block_n_range = [32, 64, 128, 256] + block_k_range = [32, 64, 128, 256] + num_warps_range = [4, 8] + num_stage_range = [2, 3, 4, 5] + num_ctas_range = [1] + split_k_range = [4, 8, 16, 32, 64] +``` +Specifically for multi-lora, `num_slices = [1,2,3]` requires to be tuned sperately for different `MNK` shapes for both shrink and expand kernels. + +## Config Files +### File Name + +For `shrink`, the config file is named as `{gpu_name}_SHRINK.json`, e.g. `NVIDIA_H200_SHRINK.json`. + +For `expand`, the config fileis named as `{gpu_name}_EXPAND_{add_input}.json`, e.g. `NVIDIA_H200_EXPAND_TRUE.json`. + +`gpu_name` can be automatically detected by calling `torch.cuda.get_device_name()` + +### Json Structure +Optimial kernel config files are saved in Json file with a structure as `config_data[max_loras][num_slices][m][k][n]` From 450ec16d7ae73ec74cfbe091600a671bb8aa4787 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Fri, 10 Oct 2025 00:52:19 +0000 Subject: [PATCH 06/18] nit --- vllm/lora/ops/triton_ops/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 0b5c727c016d..7cab783f9fc7 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -166,7 +166,7 @@ def load_v1_op_config(op_type: str, f'{user_defined_config_folder}/{config_fname}' ) if not config_path.exists(): - logger.warning_once(f"No LoRA kernel config files found in the provided folder") + logger.warning_once(f"No LoRA kernel configs founded in {config_path}") return None # Load json From 1e57c365028324f41efead38c6d839f0afc8fc70 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Tue, 14 Oct 2025 19:01:37 +0000 Subject: [PATCH 07/18] modify README --- vllm/lora/ops/triton_ops/README_TUNING.md | 30 +++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/vllm/lora/ops/triton_ops/README_TUNING.md b/vllm/lora/ops/triton_ops/README_TUNING.md index a572a9659c7d..20218bbef22e 100644 --- a/vllm/lora/ops/triton_ops/README_TUNING.md +++ b/vllm/lora/ops/triton_ops/README_TUNING.md @@ -1,9 +1,12 @@ # Multi-LoRA Tuning -**Note**: The lora config folder should be passed in by export VLLM_TUNED_CONFIG_FOLDER=/path/to/configs. Without it, the kernel would use default configs +**Note**: The LoRA configuration folder should be specified by exporting `VLLM_TUNED_CONFIG_FOLDER=/path/to/configs`. Without this, the shrink/expand kernels will use default configurations. ## Tuning Process -Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from [Triton MoE tuning](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_moe.py). An example searching space: +Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from [Triton MoE tuning](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_moe.py). + +**Step 1** +Define the searching space. An example searching space: ``` block_m_range = [16, 32, 64, 128, 256] @@ -14,7 +17,24 @@ Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from num_ctas_range = [1] split_k_range = [4, 8, 16, 32, 64] ``` -Specifically for multi-lora, `num_slices = [1,2,3]` requires to be tuned sperately for different `MNK` shapes for both shrink and expand kernels. +**Step 2** +Get all hidden_state sizes and num_slices that the target model uses for a specific TP size. + +For example, we can aquire those info by simply checking [add_lora_linear](https://github.com/li2haipeng/vllm/blob/multi_lora_v01011/vllm/lora/punica_wrapper/punica_gpu.py#L192): + +``` +print(f"x_shape: {x.view(-1, x.shape[-1]).shape}") +print(f"num_sclises: {len(output_slices)}") +for i in range(len(output_slices)): + print(f"a{i} shape: {lora_a_stacked[i].shape}") + print(f"b{i} shape: {lora_b_stacked[i].shape}") + +print("y_shape", y.shape) +``` + +**Step 3** +Benchmark the shrink/expand kernel runtime with different kernel configurations generated from the pre-defined search space by performing a grid search to find the optimal kernel configuration. vLLM's [benchmark_lora.py](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_lora.py) can be used to search for configurations for different shapes. + ## Config Files ### File Name @@ -23,7 +43,7 @@ For `shrink`, the config file is named as `{gpu_name}_SHRINK.json`, e.g. `NVIDIA For `expand`, the config fileis named as `{gpu_name}_EXPAND_{add_input}.json`, e.g. `NVIDIA_H200_EXPAND_TRUE.json`. -`gpu_name` can be automatically detected by calling `torch.cuda.get_device_name()` +The `gpu_name` can be automatically detected by calling `torch.cuda.get_device_name()` ### Json Structure -Optimial kernel config files are saved in Json file with a structure as `config_data[max_loras][num_slices][m][k][n]` +Optimal kernel configuration files are saved as JSON files with the structure `config_data[max_loras][num_slices][m][k][n]` From ffb7e8fcda2be036c703c51005bdc1c2483039d7 Mon Sep 17 00:00:00 2001 From: li2haipeng <44383182+li2haipeng@users.noreply.github.com> Date: Tue, 14 Oct 2025 21:03:47 -0700 Subject: [PATCH 08/18] Update vllm/lora/ops/triton_ops/utils.py Co-authored-by: Jee Jee Li Signed-off-by: li2haipeng <44383182+li2haipeng@users.noreply.github.com> --- vllm/lora/ops/triton_ops/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 7cab783f9fc7..35800cd5ca97 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -180,7 +180,7 @@ def load_v1_op_config(op_type: str, @functools.lru_cache -def get_v1_op_configs( +def get_lora_op_configs( op_type: str, max_loras: int, batch: int, From b4926d466ef30bda1963040611b60a4c9de6ef69 Mon Sep 17 00:00:00 2001 From: li2haipeng <44383182+li2haipeng@users.noreply.github.com> Date: Tue, 14 Oct 2025 21:05:35 -0700 Subject: [PATCH 09/18] Update vllm/lora/ops/triton_ops/utils.py Co-authored-by: Jee Jee Li Signed-off-by: li2haipeng <44383182+li2haipeng@users.noreply.github.com> --- vllm/lora/ops/triton_ops/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 35800cd5ca97..c2ebc3a7288e 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -144,7 +144,7 @@ def _get_lora_b_ptr( @functools.lru_cache -def load_v1_op_config(op_type: str, +def load_lora_op_config(op_type: str, add_inputs: Optional[bool]) -> Optional[Dict]: user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER From aca76b2fa8c30e05588e5caee5ca1044b227f0ff Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 15 Oct 2025 04:12:18 +0000 Subject: [PATCH 10/18] remove /configs --- .../NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json | 15074 ------------ .../NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json | 15074 ------------ .../configs/NVIDIA_H100_80GB_HBM3_SHRINK.json | 19430 ---------------- vllm/lora/ops/triton_ops/utils.py | 2 +- 4 files changed, 1 insertion(+), 49579 deletions(-) delete mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json delete mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json delete mode 100644 vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json deleted file mode 100644 index 67cf5e468c48..000000000000 --- a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_FALSE.json +++ /dev/null @@ -1,15074 +0,0 @@ -{ - "1": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 256, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - } - }, - "4": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 1024, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - } - }, - "8": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 1024, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - } - } -} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json deleted file mode 100644 index 58fa47dbc64f..000000000000 --- a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_EXPAND_TRUE.json +++ /dev/null @@ -1,15074 +0,0 @@ -{ - "1": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - } - }, - "4": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 128, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - } - }, - "8": { - "1": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 1024, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 16, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 16, - "block_n": 512, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "16": { - "1024": { - "block_m": 16, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "32": { - "16": { - "1024": { - "block_m": 32, - "block_n": 64, - "block_k": 16, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "16": { - "1024": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 16, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "16": { - "1024": { - "block_m": 64, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 64, - "block_n": 32, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "16": { - "1024": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "16": { - "1024": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 128, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 32, - "block_n": 256, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "16": { - "1024": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "2048": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "4096": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "8192": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - }, - "12288": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "16384": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "24576": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - }, - "32768": { - "block_m": 128, - "block_n": 64, - "block_k": 16, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - } - } -} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json b/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json deleted file mode 100644 index 20edd283b2c3..000000000000 --- a/vllm/lora/ops/triton_ops/configs/NVIDIA_H100_80GB_HBM3_SHRINK.json +++ /dev/null @@ -1,19430 +0,0 @@ -{ - "1": { - "1": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - } - }, - "4": { - "1": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 512, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - } - } - }, - "8": { - "1": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "2": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 512, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - }, - "3": { - "1": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 128, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "16": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 32, - "split_k": 64, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "32": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "64": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "128": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "192": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "256": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 256, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "320": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "384": { - "1024": { - "16": { - "block_m": 16, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "448": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 32, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "512": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "640": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "768": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "896": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 32, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "1024": { - "1024": { - "16": { - "block_m": 32, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "2048": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "3072": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "4096": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "5120": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "6144": { - "1024": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 256, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - }, - "7168": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - } - }, - "8192": { - "1024": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "2048": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "4096": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "8192": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 32, - "split_k": 8, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "12288": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "16384": { - "16": { - "block_m": 64, - "block_n": 16, - "block_k": 64, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 4, - "max_nreg": null - } - }, - "24576": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - }, - "32768": { - "16": { - "block_m": 128, - "block_n": 16, - "block_k": 128, - "split_k": 8, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 2, - "max_nreg": null - } - } - } - } - } -} \ No newline at end of file diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index c2ebc3a7288e..7fa7034308c6 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -219,7 +219,7 @@ def get_lora_op_configs( k, n = (hidden_size, rank) if op_type == "shrink" else (rank, hidden_size) config_data: Any - config_data = load_v1_op_config(op_type, add_inputs) + config_data = load_lora_op_config(op_type, add_inputs) if not config_data: logger.warning_once(f"Using default LoRA kernel configs") return default From 25182d4a9d14075bf1d2260e9a9ad00952aef2b7 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 15 Oct 2025 04:33:09 +0000 Subject: [PATCH 11/18] pre-commit fix Signed-off-by: Haipeng Li --- vllm/lora/ops/triton_ops/README_TUNING.md | 21 +++++++++++++-------- vllm/lora/ops/triton_ops/lora_expand_op.py | 4 ++-- vllm/lora/ops/triton_ops/lora_shrink_op.py | 4 ++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/vllm/lora/ops/triton_ops/README_TUNING.md b/vllm/lora/ops/triton_ops/README_TUNING.md index 20218bbef22e..8c5473ad25a2 100644 --- a/vllm/lora/ops/triton_ops/README_TUNING.md +++ b/vllm/lora/ops/triton_ops/README_TUNING.md @@ -3,12 +3,14 @@ **Note**: The LoRA configuration folder should be specified by exporting `VLLM_TUNED_CONFIG_FOLDER=/path/to/configs`. Without this, the shrink/expand kernels will use default configurations. ## Tuning Process -Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from [Triton MoE tuning](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_moe.py). + +Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from [Triton MoE tuning](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_moe.py). **Step 1** Define the searching space. An example searching space: -``` +```python + block_m_range = [16, 32, 64, 128, 256] block_n_range = [32, 64, 128, 256] block_k_range = [32, 64, 128, 256] @@ -16,34 +18,37 @@ Define the searching space. An example searching space: num_stage_range = [2, 3, 4, 5] num_ctas_range = [1] split_k_range = [4, 8, 16, 32, 64] + ``` **Step 2** -Get all hidden_state sizes and num_slices that the target model uses for a specific TP size. +Get all hidden_state sizes and num_slices that the target model uses for a specific TP size. For example, we can aquire those info by simply checking [add_lora_linear](https://github.com/li2haipeng/vllm/blob/multi_lora_v01011/vllm/lora/punica_wrapper/punica_gpu.py#L192): -``` +```python + print(f"x_shape: {x.view(-1, x.shape[-1]).shape}") print(f"num_sclises: {len(output_slices)}") for i in range(len(output_slices)): print(f"a{i} shape: {lora_a_stacked[i].shape}") print(f"b{i} shape: {lora_b_stacked[i].shape}") - print("y_shape", y.shape) + ``` **Step 3** Benchmark the shrink/expand kernel runtime with different kernel configurations generated from the pre-defined search space by performing a grid search to find the optimal kernel configuration. vLLM's [benchmark_lora.py](https://github.com/vllm-project/vllm/blob/main/benchmarks/kernels/benchmark_lora.py) can be used to search for configurations for different shapes. - ## Config Files + ### File Name -For `shrink`, the config file is named as `{gpu_name}_SHRINK.json`, e.g. `NVIDIA_H200_SHRINK.json`. +For `shrink`, the config file is named as `{gpu_name}_SHRINK.json`, e.g. `NVIDIA_H200_SHRINK.json`. For `expand`, the config fileis named as `{gpu_name}_EXPAND_{add_input}.json`, e.g. `NVIDIA_H200_EXPAND_TRUE.json`. -The `gpu_name` can be automatically detected by calling `torch.cuda.get_device_name()` +The `gpu_name` can be automatically detected by calling `torch.cuda.get_device_name()` ### Json Structure + Optimal kernel configuration files are saved as JSON files with the structure `config_data[max_loras][num_slices][m][k][n]` diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index 485cf16c42bd..c1bcb700f0d5 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -10,7 +10,7 @@ import torch from vllm.lora.ops.triton_ops.kernel_utils import do_expand_kernel -from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr, get_v1_op_configs +from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr, get_lora_op_configs from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -201,7 +201,7 @@ def _lora_expand( NUM_SLICES = len(lora_b_weights) # Triton kernel configs. - kernel_config = get_v1_op_configs(op_type="expand", + kernel_config = get_lora_op_configs(op_type="expand", max_loras=MAX_LORAS, batch=M, hidden_size=MAX_N, diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 93b311cae1c0..ed1954d1e102 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -10,7 +10,7 @@ import torch from vllm.lora.ops.triton_ops.kernel_utils import do_shrink_kernel -from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr, get_v1_op_configs +from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr, get_lora_op_configs from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -177,7 +177,7 @@ def _lora_shrink( MAX_LORAS = lora_ids.size(0) # Triton kernel configs - kernel_config = get_v1_op_configs("shrink", + kernel_config = get_lora_op_configs("shrink", max_loras=MAX_LORAS, batch=M, hidden_size=K, From 2a59623aac3edd117e6737b1b81916e759ef0aec Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 15 Oct 2025 04:39:48 +0000 Subject: [PATCH 12/18] fix Signed-off-by: Haipeng Li --- vllm/lora/ops/triton_ops/README_TUNING.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/vllm/lora/ops/triton_ops/README_TUNING.md b/vllm/lora/ops/triton_ops/README_TUNING.md index 8c5473ad25a2..fda95ea71891 100644 --- a/vllm/lora/ops/triton_ops/README_TUNING.md +++ b/vllm/lora/ops/triton_ops/README_TUNING.md @@ -10,30 +10,27 @@ Multi-lora shrink/expand Triton kernel tuning follows a similar methodology from Define the searching space. An example searching space: ```python - - block_m_range = [16, 32, 64, 128, 256] - block_n_range = [32, 64, 128, 256] - block_k_range = [32, 64, 128, 256] - num_warps_range = [4, 8] - num_stage_range = [2, 3, 4, 5] - num_ctas_range = [1] - split_k_range = [4, 8, 16, 32, 64] - +block_m_range = [16, 32, 64, 128, 256] +block_n_range = [32, 64, 128, 256] +block_k_range = [32, 64, 128, 256] +num_warps_range = [4, 8] +num_stage_range = [2, 3, 4, 5] +num_ctas_range = [1] +split_k_range = [4, 8, 16, 32, 64] ``` + **Step 2** Get all hidden_state sizes and num_slices that the target model uses for a specific TP size. For example, we can aquire those info by simply checking [add_lora_linear](https://github.com/li2haipeng/vllm/blob/multi_lora_v01011/vllm/lora/punica_wrapper/punica_gpu.py#L192): ```python - print(f"x_shape: {x.view(-1, x.shape[-1]).shape}") print(f"num_sclises: {len(output_slices)}") for i in range(len(output_slices)): print(f"a{i} shape: {lora_a_stacked[i].shape}") print(f"b{i} shape: {lora_b_stacked[i].shape}") print("y_shape", y.shape) - ``` **Step 3** From 9e4c1ac8f408f0c6fa98c9c576b24a0d454b291d Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 15 Oct 2025 04:44:43 +0000 Subject: [PATCH 13/18] fix --- vllm/lora/ops/triton_ops/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 7fa7034308c6..ae751248ce4a 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -241,4 +241,4 @@ def get_lora_op_configs( config_data.keys(), key=lambda x: abs(int(x) - n))] assert config_data is not None - return config_data \ No newline at end of file + return config_data From fbc9e10eea520a3f885756963e001a3feef28e96 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 15 Oct 2025 05:00:57 +0000 Subject: [PATCH 14/18] ruff format fix --- vllm/lora/ops/triton_ops/lora_expand_op.py | 28 +++--- vllm/lora/ops/triton_ops/lora_shrink_op.py | 28 +++--- vllm/lora/ops/triton_ops/utils.py | 102 +++++++++++---------- 3 files changed, 84 insertions(+), 74 deletions(-) diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index c1bcb700f0d5..c8330455985a 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -201,19 +201,21 @@ def _lora_expand( NUM_SLICES = len(lora_b_weights) # Triton kernel configs. - kernel_config = get_lora_op_configs(op_type="expand", - max_loras=MAX_LORAS, - batch=M, - hidden_size=MAX_N, - rank=K, - num_slices=NUM_SLICES, - add_inputs=add_inputs) - BLOCK_M = kernel_config['block_m'] - BLOCK_N = kernel_config['block_n'] - BLOCK_K = kernel_config['block_k'] - NUM_WARPS = kernel_config['num_warps'] - NUM_CTAS = kernel_config['num_ctas'] - NUM_STAGES = kernel_config['num_stages'] + kernel_config = get_lora_op_configs( + op_type="expand", + max_loras=MAX_LORAS, + batch=M, + hidden_size=MAX_N, + rank=K, + num_slices=NUM_SLICES, + add_inputs=add_inputs, + ) + BLOCK_M = kernel_config["block_m"] + BLOCK_N = kernel_config["block_n"] + BLOCK_K = kernel_config["block_k"] + NUM_WARPS = kernel_config["num_warps"] + NUM_CTAS = kernel_config["num_ctas"] + NUM_STAGES = kernel_config["num_stages"] EVEN_K = K % BLOCK_K == 0 # type: ignore diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index ed1954d1e102..9cba8f494448 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -177,19 +177,21 @@ def _lora_shrink( MAX_LORAS = lora_ids.size(0) # Triton kernel configs - kernel_config = get_lora_op_configs("shrink", - max_loras=MAX_LORAS, - batch=M, - hidden_size=K, - rank=N, - num_slices=NUM_SLICES) - BLOCK_M = kernel_config['block_m'] - BLOCK_N = kernel_config['block_n'] - BLOCK_K = kernel_config['block_k'] - SPLIT_K = kernel_config['split_k'] - NUM_WARPS = kernel_config['num_warps'] - NUM_STAGES = kernel_config['num_stages'] - NUM_CTAS = kernel_config['num_ctas'] + kernel_config = get_lora_op_configs( + "shrink", + max_loras=MAX_LORAS, + batch=M, + hidden_size=K, + rank=N, + num_slices=NUM_SLICES, + ) + BLOCK_M = kernel_config["block_m"] + BLOCK_N = kernel_config["block_n"] + BLOCK_K = kernel_config["block_k"] + SPLIT_K = kernel_config["split_k"] + NUM_WARPS = kernel_config["num_warps"] + NUM_STAGES = kernel_config["num_stages"] + NUM_CTAS = kernel_config["num_ctas"] EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index ae751248ce4a..9ffb6dc3d85e 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -1,14 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import torch import functools import json -import os from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any + +import torch + from vllm import envs from vllm.logger import init_logger + logger = init_logger(__name__) _LORA_A_PTR_DICT: dict[tuple[int, ...], tuple[torch.tensor, ...]] = {} @@ -144,27 +146,23 @@ def _get_lora_b_ptr( @functools.lru_cache -def load_lora_op_config(op_type: str, - add_inputs: Optional[bool]) -> Optional[Dict]: - +def load_lora_op_config(op_type: str, add_inputs: bool | None) -> dict | None: user_defined_config_folder = envs.VLLM_TUNED_CONFIG_FOLDER if user_defined_config_folder is not None: gpu_name = torch.cuda.get_device_name() - gpu_name = gpu_name.replace(' ', '_') - gpu_name = gpu_name.replace('-', '_') + gpu_name = gpu_name.replace(" ", "_") + gpu_name = gpu_name.replace("-", "_") config_fname = None if op_type == "shrink": config_fname = f"{gpu_name}_{op_type.upper()}.json" else: assert op_type == "expand" - config_fname = (f"{gpu_name}_" - f"{op_type.upper()}_" - f"{str(add_inputs).upper()}.json") + config_fname = ( + f"{gpu_name}_{op_type.upper()}_{str(add_inputs).upper()}.json" + ) - config_path = Path( - f'{user_defined_config_folder}/{config_fname}' - ) + config_path = Path(f"{user_defined_config_folder}/{config_fname}") if not config_path.exists(): logger.warning_once(f"No LoRA kernel configs founded in {config_path}") return None @@ -175,44 +173,44 @@ def load_lora_op_config(op_type: str, config_data = json.load(f) else: config_data = None - + return config_data @functools.lru_cache def get_lora_op_configs( - op_type: str, - max_loras: int, - batch: int, - hidden_size: int, - rank: int, - num_slices: int, - add_inputs: Optional[bool] = None) -> dict[str, Optional[int]]: - + op_type: str, + max_loras: int, + batch: int, + hidden_size: int, + rank: int, + num_slices: int, + add_inputs: bool | None = None, +) -> dict[str, int | None]: assert op_type in ["shrink", "expand"] # default config default = {} if op_type == "shrink": default = { - 'block_m': 32, - 'block_n': 16, - 'block_k': 256 if batch < 128 else 32, - 'split_k': 64 if batch < 128 else 8, - 'num_warps': 4, - 'num_ctas': 1, - 'num_stages': 2, - 'max_nreg': None + "block_m": 32, + "block_n": 16, + "block_k": 256 if batch < 128 else 32, + "split_k": 64 if batch < 128 else 8, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": None, } else: default = { - 'block_m': 64, - 'block_n': 128, - 'block_k': 16, - 'num_warps': 4, - 'num_ctas': 1, - 'num_stages': 2, - 'max_nreg': None + "block_m": 64, + "block_n": 128, + "block_k": 16, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2, + "max_nreg": None, } m = batch @@ -221,24 +219,32 @@ def get_lora_op_configs( config_data: Any config_data = load_lora_op_config(op_type, add_inputs) if not config_data: - logger.warning_once(f"Using default LoRA kernel configs") + logger.warning_once("Using default LoRA kernel configs") return default - + # config is structured as config_data[max_loras][num_slices][m][k][n] = {} # slice by max_loras - config_data = config_data.get(str(max_loras)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - max_loras))] + config_data = ( + config_data.get(str(max_loras)) + or config_data[min(config_data.keys(), key=lambda x: abs(int(x) - max_loras))] + ) # slice by num_slices config_data = config_data[str(num_slices)] # slice by m - config_data = config_data.get(str(m)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - m))] + config_data = ( + config_data.get(str(m)) + or config_data[min(config_data.keys(), key=lambda x: abs(int(x) - m))] + ) # slice by k - config_data = config_data.get(str(k)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - k))] + config_data = ( + config_data.get(str(k)) + or config_data[min(config_data.keys(), key=lambda x: abs(int(x) - k))] + ) # slice by n - config_data = config_data.get(str(n)) or config_data[min( - config_data.keys(), key=lambda x: abs(int(x) - n))] + config_data = ( + config_data.get(str(n)) + or config_data[min(config_data.keys(), key=lambda x: abs(int(x) - n))] + ) assert config_data is not None return config_data From d1fc1a144c4c804075797bcc432acbe28d95d83a Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Tue, 28 Oct 2025 22:11:56 +0000 Subject: [PATCH 15/18] init --- vllm/lora/ops/triton_ops/lora_shrink_op.py | 15 +++++++++++++-- vllm/lora/ops/triton_ops/utils.py | 1 + 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 8d126197f83e..b88f0ea73343 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -41,6 +41,7 @@ def _lora_shrink_kernel( BLOCK_K: tl.constexpr, EVEN_K: tl.constexpr, SPLIT_K: tl.constexpr, + GROUP_SIZE_M: tl.constexpr, SLICE_NUM: tl.constexpr, ): cta_n_num = tl.cdiv(N, BLOCK_N) @@ -48,8 +49,16 @@ def _lora_shrink_kernel( pid_sk_m_n = tl.program_id(axis=0) pid_sk = pid_sk_m_n % SPLIT_K - pid_m = (pid_sk_m_n // SPLIT_K) % cta_m_num - pid_n = pid_sk_m_n // (SPLIT_K * cta_m_num) % cta_n_num + + pid_m_n = pid_sk_m_n // SPLIT_K + num_pid_in_group = GROUP_SIZE_M * cta_n_num + group_id = pid_m_n // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_size_m = min(cta_m_num - first_pid_m, GROUP_SIZE_M) + + # Column-major ordering within groups for better cache reuse + pid_m = first_pid_m + ((pid_m_n % num_pid_in_group) % group_size_m) + pid_n = (pid_m_n % num_pid_in_group) // group_size_m slice_id = tl.program_id(axis=1) lora_idx = tl.program_id(axis=2) @@ -194,6 +203,7 @@ def _lora_shrink( NUM_WARPS = kernel_config["num_warps"] NUM_STAGES = kernel_config["num_stages"] NUM_CTAS = kernel_config["num_ctas"] + GROUP_SIZE_M = kernel_config['group_size_m'] EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the @@ -233,6 +243,7 @@ def _lora_shrink( BLOCK_K, EVEN_K, SPLIT_K, + GROUP_SIZE_M, NUM_SLICES, num_warps=NUM_WARPS, num_ctas=NUM_CTAS, diff --git a/vllm/lora/ops/triton_ops/utils.py b/vllm/lora/ops/triton_ops/utils.py index 9ffb6dc3d85e..368c5037d2e4 100644 --- a/vllm/lora/ops/triton_ops/utils.py +++ b/vllm/lora/ops/triton_ops/utils.py @@ -199,6 +199,7 @@ def get_lora_op_configs( "split_k": 64 if batch < 128 else 8, "num_warps": 4, "num_ctas": 1, + "group_size_m": 8, "num_stages": 2, "max_nreg": None, } From aca6e837515ce40d090223ba79dd82e09b2b90f7 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Tue, 28 Oct 2025 23:03:14 +0000 Subject: [PATCH 16/18] fix --- vllm/lora/ops/triton_ops/lora_shrink_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index b88f0ea73343..534ad6af17b2 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -203,7 +203,7 @@ def _lora_shrink( NUM_WARPS = kernel_config["num_warps"] NUM_STAGES = kernel_config["num_stages"] NUM_CTAS = kernel_config["num_ctas"] - GROUP_SIZE_M = kernel_config['group_size_m'] + GROUP_SIZE_M = kernel_config["group_size_m"] EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the From c23bf5ffd14bc0657b1eb5e9604ccf0ada98241e Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Wed, 29 Oct 2025 21:47:27 +0000 Subject: [PATCH 17/18] add default --- vllm/lora/ops/triton_ops/lora_shrink_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 534ad6af17b2..b0ab301880f0 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -203,7 +203,7 @@ def _lora_shrink( NUM_WARPS = kernel_config["num_warps"] NUM_STAGES = kernel_config["num_stages"] NUM_CTAS = kernel_config["num_ctas"] - GROUP_SIZE_M = kernel_config["group_size_m"] + GROUP_SIZE_M = kernel_config.get('group_size_m', 8) EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the From 34e0ac41b672f92b74d02a910d570b92d0e2d2e8 Mon Sep 17 00:00:00 2001 From: Haipeng Li Date: Thu, 30 Oct 2025 16:59:49 +0000 Subject: [PATCH 18/18] ruff format --- vllm/lora/ops/triton_ops/lora_shrink_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index b0ab301880f0..adc5c9dce5e8 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -203,7 +203,7 @@ def _lora_shrink( NUM_WARPS = kernel_config["num_warps"] NUM_STAGES = kernel_config["num_stages"] NUM_CTAS = kernel_config["num_ctas"] - GROUP_SIZE_M = kernel_config.get('group_size_m', 8) + GROUP_SIZE_M = kernel_config.get("group_size_m", 8) EVEN_K = K % (BLOCK_K * SPLIT_K) == 0 # type: ignore # TODO (varun): This grid formulation maximizes parallelization at the