File tree Expand file tree Collapse file tree 2 files changed +22
-0
lines changed
v1/attention/backends/mla Expand file tree Collapse file tree 2 files changed +22
-0
lines changed Original file line number Diff line number Diff line change 1717from vllm .attention .ops .flashmla import (flash_mla_with_kvcache ,
1818 get_mla_metadata ,
1919 is_flashmla_supported )
20+ from vllm .platforms .cuda import CudaPlatform
2021
2122
2223class FlashMLABackend (MLACommonBackend ):
@@ -181,6 +182,16 @@ def __init__(
181182 assert is_flashmla_supported (), \
182183 "FlashMLA is not supported on this device"
183184
185+ # disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
186+ # context:
187+ # https://github.com/deepseek-ai/FlashMLA/issues/83
188+ # https://github.com/vllm-project/vllm/issues/24513
189+ if CudaPlatform .has_device_capability (100 ):
190+ raise NotImplementedError (
191+ "FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
192+ "Please use CUTLASS_MLA or TRITON_MLA instead. "
193+ "Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`" )
194+
184195 unsupported_features = [alibi_slopes , sliding_window , logits_soft_cap ]
185196 if any (unsupported_features ):
186197 raise NotImplementedError (
Original file line number Diff line number Diff line change 1212 is_flashmla_supported )
1313from vllm .config import VllmConfig
1414from vllm .logger import init_logger
15+ from vllm .platforms .cuda import CudaPlatform
1516from vllm .v1 .attention .backends .mla .common import (MLACommonBackend ,
1617 MLACommonDecodeMetadata ,
1718 MLACommonImpl ,
@@ -158,6 +159,16 @@ def __init__(
158159 assert is_flashmla_supported (), \
159160 "FlashMLA is not supported on this device"
160161
162+ # disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
163+ # context:
164+ # https://github.com/deepseek-ai/FlashMLA/issues/83
165+ # https://github.com/vllm-project/vllm/issues/24513
166+ if CudaPlatform .has_device_capability (100 ):
167+ raise NotImplementedError (
168+ "FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
169+ "Please use CUTLASS_MLA or TRITON_MLA instead. "
170+ "Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`" )
171+
161172 unsupported_features = [alibi_slopes , sliding_window , logits_soft_cap ]
162173 if any (unsupported_features ):
163174 raise NotImplementedError (
You can’t perform that action at this time.
0 commit comments