Skip to content

Commit 15de5ff

Browse files
[Feature] Disallow FlashMLA on Blackwell (#24521)
Signed-off-by: yewentao256 <[email protected]> Signed-off-by: Wentao Ye <[email protected]> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent b8a9307 commit 15de5ff

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

vllm/attention/backends/flashmla.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
1818
get_mla_metadata,
1919
is_flashmla_supported)
20+
from vllm.platforms.cuda import CudaPlatform
2021

2122

2223
class FlashMLABackend(MLACommonBackend):
@@ -181,6 +182,16 @@ def __init__(
181182
assert is_flashmla_supported(), \
182183
"FlashMLA is not supported on this device"
183184

185+
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
186+
# context:
187+
# https://github.com/deepseek-ai/FlashMLA/issues/83
188+
# https://github.com/vllm-project/vllm/issues/24513
189+
if CudaPlatform.has_device_capability(100):
190+
raise NotImplementedError(
191+
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
192+
"Please use CUTLASS_MLA or TRITON_MLA instead. "
193+
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
194+
184195
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
185196
if any(unsupported_features):
186197
raise NotImplementedError(

vllm/v1/attention/backends/mla/flashmla.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
is_flashmla_supported)
1313
from vllm.config import VllmConfig
1414
from vllm.logger import init_logger
15+
from vllm.platforms.cuda import CudaPlatform
1516
from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
1617
MLACommonDecodeMetadata,
1718
MLACommonImpl,
@@ -158,6 +159,16 @@ def __init__(
158159
assert is_flashmla_supported(), \
159160
"FlashMLA is not supported on this device"
160161

162+
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
163+
# context:
164+
# https://github.com/deepseek-ai/FlashMLA/issues/83
165+
# https://github.com/vllm-project/vllm/issues/24513
166+
if CudaPlatform.has_device_capability(100):
167+
raise NotImplementedError(
168+
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
169+
"Please use CUTLASS_MLA or TRITON_MLA instead. "
170+
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
171+
161172
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
162173
if any(unsupported_features):
163174
raise NotImplementedError(

0 commit comments

Comments
 (0)