File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed
python/sglang/srt/layers/attention Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -236,7 +236,11 @@ def make_local_attention_virtual_batches(
236236 np .arange (pages_per_local_batch , dtype = np .int32 ),
237237 (virtual_batches , pages_per_local_batch ),
238238 ) + np .expand_dims (block_starts , axis = 1 )
239- block_indices = block_indices .flatten ()
239+ # Ensure block_indices doesn't exceed block_table dimensions
240+ # This is a critical safety check that prevents index out of bounds errors
241+ # when dealing with large sequences (>8192 tokens) or when the block_table
242+ # dimensions are smaller than what would be needed for the full attention chunk size.
243+ block_indices = block_indices .flatten ().clip (max = block_table .shape [1 ] - 1 )
240244 batch_indices = np .repeat (
241245 np .arange (actual_batch_size , dtype = np .int32 ),
242246 local_blocks * pages_per_local_batch ,
You can’t perform that action at this time.
0 commit comments