Skip to content

Commit de23377

Browse files
Merge pull request vllm-project#5 from wenxcs/fit-cluster-tests
Faster v2 hopper fused moe kernel configs
2 parents 23be767 + 4e6c35a commit de23377

7 files changed

+659
-127
lines changed

config.json

Lines changed: 0 additions & 36 deletions
This file was deleted.
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
{
2+
"3328": {
3+
"BLOCK_SIZE_M": 64,
4+
"BLOCK_SIZE_N": 256,
5+
"BLOCK_SIZE_K": 64,
6+
"GROUP_SIZE_M": 16,
7+
"num_warps": 4,
8+
"num_stages": 2
9+
},
10+
"1024": {
11+
"BLOCK_SIZE_M": 64,
12+
"BLOCK_SIZE_N": 256,
13+
"BLOCK_SIZE_K": 32,
14+
"GROUP_SIZE_M": 32,
15+
"num_warps": 4,
16+
"num_stages": 4
17+
},
18+
"3072": {
19+
"BLOCK_SIZE_M": 64,
20+
"BLOCK_SIZE_N": 256,
21+
"BLOCK_SIZE_K": 64,
22+
"GROUP_SIZE_M": 32,
23+
"num_warps": 4,
24+
"num_stages": 2
25+
},
26+
"256": {
27+
"BLOCK_SIZE_M": 32,
28+
"BLOCK_SIZE_N": 256,
29+
"BLOCK_SIZE_K": 128,
30+
"GROUP_SIZE_M": 8,
31+
"num_warps": 4,
32+
"num_stages": 4
33+
},
34+
"768": {
35+
"BLOCK_SIZE_M": 128,
36+
"BLOCK_SIZE_N": 128,
37+
"BLOCK_SIZE_K": 64,
38+
"GROUP_SIZE_M": 8,
39+
"num_warps": 4,
40+
"num_stages": 4
41+
},
42+
"1792": {
43+
"BLOCK_SIZE_M": 128,
44+
"BLOCK_SIZE_N": 128,
45+
"BLOCK_SIZE_K": 64,
46+
"GROUP_SIZE_M": 16,
47+
"num_warps": 4,
48+
"num_stages": 4
49+
},
50+
"2560": {
51+
"BLOCK_SIZE_M": 64,
52+
"BLOCK_SIZE_N": 256,
53+
"BLOCK_SIZE_K": 64,
54+
"GROUP_SIZE_M": 32,
55+
"num_warps": 4,
56+
"num_stages": 2
57+
},
58+
"2816": {
59+
"BLOCK_SIZE_M": 128,
60+
"BLOCK_SIZE_N": 128,
61+
"BLOCK_SIZE_K": 64,
62+
"GROUP_SIZE_M": 16,
63+
"num_warps": 4,
64+
"num_stages": 4
65+
},
66+
"3584": {
67+
"BLOCK_SIZE_M": 64,
68+
"BLOCK_SIZE_N": 256,
69+
"BLOCK_SIZE_K": 64,
70+
"GROUP_SIZE_M": 32,
71+
"num_warps": 4,
72+
"num_stages": 2
73+
},
74+
"1536": {
75+
"BLOCK_SIZE_M": 64,
76+
"BLOCK_SIZE_N": 256,
77+
"BLOCK_SIZE_K": 64,
78+
"GROUP_SIZE_M": 64,
79+
"num_warps": 4,
80+
"num_stages": 2
81+
},
82+
"2048": {
83+
"BLOCK_SIZE_M": 64,
84+
"BLOCK_SIZE_N": 256,
85+
"BLOCK_SIZE_K": 64,
86+
"GROUP_SIZE_M": 64,
87+
"num_warps": 4,
88+
"num_stages": 2
89+
},
90+
"512": {
91+
"BLOCK_SIZE_M": 64,
92+
"BLOCK_SIZE_N": 256,
93+
"BLOCK_SIZE_K": 64,
94+
"GROUP_SIZE_M": 8,
95+
"num_warps": 4,
96+
"num_stages": 4
97+
},
98+
"3840": {
99+
"BLOCK_SIZE_M": 128,
100+
"BLOCK_SIZE_N": 128,
101+
"BLOCK_SIZE_K": 64,
102+
"GROUP_SIZE_M": 16,
103+
"num_warps": 4,
104+
"num_stages": 4
105+
},
106+
"1280": {
107+
"BLOCK_SIZE_M": 64,
108+
"BLOCK_SIZE_N": 256,
109+
"BLOCK_SIZE_K": 64,
110+
"GROUP_SIZE_M": 64,
111+
"num_warps": 4,
112+
"num_stages": 2
113+
},
114+
"2304": {
115+
"BLOCK_SIZE_M": 64,
116+
"BLOCK_SIZE_N": 256,
117+
"BLOCK_SIZE_K": 64,
118+
"GROUP_SIZE_M": 32,
119+
"num_warps": 4,
120+
"num_stages": 2
121+
},
122+
"4096": {
123+
"BLOCK_SIZE_M": 64,
124+
"BLOCK_SIZE_N": 256,
125+
"BLOCK_SIZE_K": 64,
126+
"GROUP_SIZE_M": 32,
127+
"num_warps": 4,
128+
"num_stages": 2
129+
}
130+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
{
2+
"2048": {
3+
"BLOCK_SIZE_M": 128,
4+
"BLOCK_SIZE_N": 256,
5+
"BLOCK_SIZE_K": 32,
6+
"GROUP_SIZE_M": 32,
7+
"num_warps": 8,
8+
"num_stages": 4
9+
},
10+
"1536": {
11+
"BLOCK_SIZE_M": 128,
12+
"BLOCK_SIZE_N": 256,
13+
"BLOCK_SIZE_K": 32,
14+
"GROUP_SIZE_M": 8,
15+
"num_warps": 8,
16+
"num_stages": 4
17+
},
18+
"3072": {
19+
"BLOCK_SIZE_M": 128,
20+
"BLOCK_SIZE_N": 256,
21+
"BLOCK_SIZE_K": 32,
22+
"GROUP_SIZE_M": 32,
23+
"num_warps": 8,
24+
"num_stages": 4
25+
},
26+
"1024": {
27+
"BLOCK_SIZE_M": 64,
28+
"BLOCK_SIZE_N": 256,
29+
"BLOCK_SIZE_K": 64,
30+
"GROUP_SIZE_M": 8,
31+
"num_warps": 8,
32+
"num_stages": 4
33+
},
34+
"512": {
35+
"BLOCK_SIZE_M": 128,
36+
"BLOCK_SIZE_N": 128,
37+
"BLOCK_SIZE_K": 64,
38+
"GROUP_SIZE_M": 1,
39+
"num_warps": 8,
40+
"num_stages": 4
41+
},
42+
"2560": {
43+
"BLOCK_SIZE_M": 128,
44+
"BLOCK_SIZE_N": 256,
45+
"BLOCK_SIZE_K": 32,
46+
"GROUP_SIZE_M": 32,
47+
"num_warps": 8,
48+
"num_stages": 4
49+
},
50+
"2304": {
51+
"BLOCK_SIZE_M": 128,
52+
"BLOCK_SIZE_N": 256,
53+
"BLOCK_SIZE_K": 32,
54+
"GROUP_SIZE_M": 32,
55+
"num_warps": 8,
56+
"num_stages": 4
57+
},
58+
"2816": {
59+
"BLOCK_SIZE_M": 128,
60+
"BLOCK_SIZE_N": 256,
61+
"BLOCK_SIZE_K": 32,
62+
"GROUP_SIZE_M": 32,
63+
"num_warps": 8,
64+
"num_stages": 4
65+
},
66+
"768": {
67+
"BLOCK_SIZE_M": 128,
68+
"BLOCK_SIZE_N": 128,
69+
"BLOCK_SIZE_K": 64,
70+
"GROUP_SIZE_M": 1,
71+
"num_warps": 8,
72+
"num_stages": 4
73+
},
74+
"1280": {
75+
"BLOCK_SIZE_M": 64,
76+
"BLOCK_SIZE_N": 256,
77+
"BLOCK_SIZE_K": 64,
78+
"GROUP_SIZE_M": 16,
79+
"num_warps": 8,
80+
"num_stages": 4
81+
},
82+
"1792": {
83+
"BLOCK_SIZE_M": 128,
84+
"BLOCK_SIZE_N": 256,
85+
"BLOCK_SIZE_K": 32,
86+
"GROUP_SIZE_M": 32,
87+
"num_warps": 8,
88+
"num_stages": 4
89+
},
90+
"256": {
91+
"BLOCK_SIZE_M": 64,
92+
"BLOCK_SIZE_N": 128,
93+
"BLOCK_SIZE_K": 128,
94+
"GROUP_SIZE_M": 1,
95+
"num_warps": 8,
96+
"num_stages": 4
97+
},
98+
"3328": {
99+
"BLOCK_SIZE_M": 64,
100+
"BLOCK_SIZE_N": 256,
101+
"BLOCK_SIZE_K": 32,
102+
"GROUP_SIZE_M": 16,
103+
"num_warps": 4,
104+
"num_stages": 4
105+
}
106+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
{
2+
"2048": {
3+
"BLOCK_SIZE_M": 128,
4+
"BLOCK_SIZE_N": 256,
5+
"BLOCK_SIZE_K": 32,
6+
"GROUP_SIZE_M": 32,
7+
"num_warps": 8,
8+
"num_stages": 4
9+
},
10+
"1536": {
11+
"BLOCK_SIZE_M": 128,
12+
"BLOCK_SIZE_N": 256,
13+
"BLOCK_SIZE_K": 32,
14+
"GROUP_SIZE_M": 8,
15+
"num_warps": 8,
16+
"num_stages": 4
17+
},
18+
"3072": {
19+
"BLOCK_SIZE_M": 128,
20+
"BLOCK_SIZE_N": 256,
21+
"BLOCK_SIZE_K": 32,
22+
"GROUP_SIZE_M": 32,
23+
"num_warps": 8,
24+
"num_stages": 4
25+
},
26+
"1024": {
27+
"BLOCK_SIZE_M": 64,
28+
"BLOCK_SIZE_N": 256,
29+
"BLOCK_SIZE_K": 64,
30+
"GROUP_SIZE_M": 8,
31+
"num_warps": 8,
32+
"num_stages": 4
33+
},
34+
"512": {
35+
"BLOCK_SIZE_M": 128,
36+
"BLOCK_SIZE_N": 128,
37+
"BLOCK_SIZE_K": 64,
38+
"GROUP_SIZE_M": 1,
39+
"num_warps": 8,
40+
"num_stages": 4
41+
},
42+
"2560": {
43+
"BLOCK_SIZE_M": 128,
44+
"BLOCK_SIZE_N": 256,
45+
"BLOCK_SIZE_K": 32,
46+
"GROUP_SIZE_M": 32,
47+
"num_warps": 8,
48+
"num_stages": 4
49+
},
50+
"2304": {
51+
"BLOCK_SIZE_M": 128,
52+
"BLOCK_SIZE_N": 256,
53+
"BLOCK_SIZE_K": 32,
54+
"GROUP_SIZE_M": 32,
55+
"num_warps": 8,
56+
"num_stages": 4
57+
},
58+
"2816": {
59+
"BLOCK_SIZE_M": 128,
60+
"BLOCK_SIZE_N": 256,
61+
"BLOCK_SIZE_K": 32,
62+
"GROUP_SIZE_M": 32,
63+
"num_warps": 8,
64+
"num_stages": 4
65+
},
66+
"768": {
67+
"BLOCK_SIZE_M": 128,
68+
"BLOCK_SIZE_N": 128,
69+
"BLOCK_SIZE_K": 64,
70+
"GROUP_SIZE_M": 1,
71+
"num_warps": 8,
72+
"num_stages": 4
73+
},
74+
"1280": {
75+
"BLOCK_SIZE_M": 64,
76+
"BLOCK_SIZE_N": 256,
77+
"BLOCK_SIZE_K": 64,
78+
"GROUP_SIZE_M": 16,
79+
"num_warps": 8,
80+
"num_stages": 4
81+
},
82+
"1792": {
83+
"BLOCK_SIZE_M": 128,
84+
"BLOCK_SIZE_N": 256,
85+
"BLOCK_SIZE_K": 32,
86+
"GROUP_SIZE_M": 32,
87+
"num_warps": 8,
88+
"num_stages": 4
89+
},
90+
"256": {
91+
"BLOCK_SIZE_M": 64,
92+
"BLOCK_SIZE_N": 128,
93+
"BLOCK_SIZE_K": 128,
94+
"GROUP_SIZE_M": 1,
95+
"num_warps": 8,
96+
"num_stages": 4
97+
},
98+
"3328": {
99+
"BLOCK_SIZE_M": 64,
100+
"BLOCK_SIZE_N": 256,
101+
"BLOCK_SIZE_K": 32,
102+
"GROUP_SIZE_M": 16,
103+
"num_warps": 4,
104+
"num_stages": 4
105+
}
106+
}

0 commit comments

Comments
 (0)