|
6 | 6 |
|
7 | 7 | from tests.kernels.utils import opcheck |
8 | 8 | from vllm import _custom_ops as ops |
9 | | -from vllm.utils import get_max_shared_memory_bytes, is_hip |
| 9 | +from vllm.utils import get_max_shared_memory_bytes, is_hip, seed_everything |
10 | 10 |
|
11 | 11 | from .allclose_default import get_default_atol, get_default_rtol |
12 | 12 |
|
@@ -139,10 +139,8 @@ def test_paged_attention( |
139 | 139 | ) -> None: |
140 | 140 | if kv_cache_dtype == "fp8" and head_size % 16: |
141 | 141 | pytest.skip() |
142 | | - random.seed(seed) |
143 | | - torch.random.manual_seed(seed) |
144 | | - if torch.cuda.is_available(): |
145 | | - torch.cuda.manual_seed(seed) |
| 142 | + |
| 143 | + seed_everything(seed) |
146 | 144 | torch.set_default_device(device) |
147 | 145 | scale = float(1.0 / (head_size**0.5)) |
148 | 146 | num_query_heads, num_kv_heads = num_heads |
@@ -354,10 +352,7 @@ def test_paged_attention_rocm( |
354 | 352 | seed: int, |
355 | 353 | device: str, |
356 | 354 | ) -> None: |
357 | | - random.seed(seed) |
358 | | - torch.random.manual_seed(seed) |
359 | | - if torch.cuda.is_available(): |
360 | | - torch.cuda.manual_seed(seed) |
| 355 | + seed_everything(seed) |
361 | 356 | torch.set_default_device(device) |
362 | 357 | scale = float(1.0 / (head_size**0.5)) |
363 | 358 | num_query_heads, num_kv_heads = num_heads |
@@ -506,10 +501,7 @@ def test_multi_query_kv_attention( |
506 | 501 | seed: int, |
507 | 502 | device: str, |
508 | 503 | ) -> None: |
509 | | - random.seed(seed) |
510 | | - torch.random.manual_seed(seed) |
511 | | - if torch.cuda.is_available(): |
512 | | - torch.cuda.manual_seed(seed) |
| 504 | + seed_everything(seed) |
513 | 505 | torch.set_default_device(device) |
514 | 506 | # MAX_SEQ_LEN sometimes causes OOM in the reference implementation. |
515 | 507 | # As the xformers library is already tested with its own tests, we can use |
|
0 commit comments