|
| 1 | +## vLLM |
| 2 | +- &vllm |
| 3 | + name: "cuda11-vllm" |
| 4 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm" |
| 5 | + license: apache-2.0 |
| 6 | + urls: |
| 7 | + - https://github.com/vllm-project/vllm |
| 8 | + tags: |
| 9 | + - text-to-text |
| 10 | + - multimodal |
| 11 | + - GPTQ |
| 12 | + - AWQ |
| 13 | + - AutoRound |
| 14 | + - INT4 |
| 15 | + - INT8 |
| 16 | + - FP8 |
| 17 | + icon: https://raw.githubusercontent.com/vllm-project/vllm/main/docs/assets/logos/vllm-logo-text-dark.png |
| 18 | + description: | |
| 19 | + vLLM is a fast and easy-to-use library for LLM inference and serving. |
| 20 | + Originally developed in the Sky Computing Lab at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry. |
| 21 | + vLLM is fast with: |
| 22 | + State-of-the-art serving throughput |
| 23 | + Efficient management of attention key and value memory with PagedAttention |
| 24 | + Continuous batching of incoming requests |
| 25 | + Fast model execution with CUDA/HIP graph |
| 26 | + Quantizations: GPTQ, AWQ, AutoRound, INT4, INT8, and FP8 |
| 27 | + Optimized CUDA kernels, including integration with FlashAttention and FlashInfer |
| 28 | + Speculative decoding |
| 29 | + Chunked prefill |
| 30 | + alias: "vllm" |
| 31 | +- !!merge <<: *vllm |
| 32 | + name: "cuda12-vllm" |
| 33 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm" |
| 34 | +- !!merge <<: *vllm |
| 35 | + name: "rocm-vllm" |
| 36 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm" |
| 37 | +- !!merge <<: *vllm |
| 38 | + name: "intel-sycl-f32-vllm" |
| 39 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm" |
| 40 | +- !!merge <<: *vllm |
| 41 | + name: "intel-sycl-f16-vllm" |
| 42 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm" |
| 43 | +- !!merge <<: *vllm |
| 44 | + name: "cuda11-vllm-master" |
| 45 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm" |
| 46 | +- !!merge <<: *vllm |
| 47 | + name: "cuda12-vllm-master" |
| 48 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm" |
| 49 | +- !!merge <<: *vllm |
| 50 | + name: "rocm-vllm-master" |
| 51 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm" |
| 52 | +- !!merge <<: *vllm |
| 53 | + name: "intel-sycl-f32-vllm-master" |
| 54 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm" |
| 55 | +- !!merge <<: *vllm |
| 56 | + name: "intel-sycl-f16-vllm-master" |
| 57 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm" |
| 58 | +## Rerankers |
1 | 59 | - name: "cuda11-rerankers" |
2 | 60 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-rerankers" |
3 | 61 | alias: "cuda11-rerankers" |
4 | | - |
5 | | -- name: "cuda11-vllm" |
6 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-vllm" |
7 | | - alias: "cuda11-vllm" |
8 | | - |
9 | | -- name: "cuda11-transformers" |
10 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers" |
11 | | - alias: "cuda11-transformers" |
12 | | - |
13 | | -- name: "cuda11-diffusers" |
14 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers" |
15 | | - alias: "cuda11-diffusers" |
16 | | - |
17 | | -- name: "cuda11-exllama2" |
18 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2" |
19 | | - alias: "cuda11-exllama2" |
20 | | - |
21 | 62 | - name: "cuda12-rerankers" |
22 | 63 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-rerankers" |
23 | 64 | alias: "cuda12-rerankers" |
24 | | - |
25 | | -- name: "cuda12-vllm" |
26 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-vllm" |
27 | | - alias: "cuda12-vllm" |
28 | | - |
29 | | -- name: "cuda12-transformers" |
30 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers" |
31 | | - alias: "cuda12-transformers" |
32 | | - |
33 | | -- name: "cuda12-diffusers" |
34 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers" |
35 | | - alias: "cuda12-diffusers" |
36 | | - |
37 | | -- name: "cuda12-exllama2" |
38 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2" |
39 | | - alias: "cuda12-exllama2" |
40 | | - |
41 | | -- name: "rocm-rerankers" |
42 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers" |
43 | | - alias: "rocm-rerankers" |
44 | | - |
45 | | -- name: "rocm-vllm" |
46 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-vllm" |
47 | | - alias: "rocm-vllm" |
48 | | - |
49 | | -- name: "rocm-transformers" |
50 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers" |
51 | | - alias: "rocm-transformers" |
52 | | - |
53 | | -- name: "rocm-diffusers" |
54 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers" |
55 | | - alias: "rocm-diffusers" |
56 | | - |
57 | 65 | - name: "intel-sycl-f32-rerankers" |
58 | 66 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-rerankers" |
59 | 67 | alias: "intel-sycl-f32-rerankers" |
60 | | - |
61 | 68 | - name: "intel-sycl-f16-rerankers" |
62 | 69 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-rerankers" |
63 | 70 | alias: "intel-sycl-f16-rerankers" |
| 71 | +- name: "rocm-rerankers" |
| 72 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-rerankers" |
| 73 | + alias: "rocm-rerankers" |
| 74 | +- name: "cuda11-rerankers-master" |
| 75 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers" |
| 76 | + alias: "rerankers" |
64 | 77 |
|
65 | | -- name: "intel-sycl-f32-vllm" |
66 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-vllm" |
67 | | - alias: "intel-sycl-f32-vllm" |
| 78 | +- name: "cuda12-rerankers-master" |
| 79 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers" |
| 80 | + alias: "rerankers" |
| 81 | +- name: "rocm-rerankers-master" |
| 82 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers" |
| 83 | + alias: "rerankers" |
68 | 84 |
|
69 | | -- name: "intel-sycl-f16-vllm" |
70 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-vllm" |
71 | | - alias: "intel-sycl-f16-vllm" |
| 85 | +- name: "intel-sycl-f32-rerankers-master" |
| 86 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers" |
| 87 | + alias: "rerankers" |
72 | 88 |
|
| 89 | +- name: "intel-sycl-f16-rerankers-master" |
| 90 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers" |
| 91 | + alias: "rerankers" |
| 92 | +## Transformers |
| 93 | +- name: "cuda12-transformers" |
| 94 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-transformers" |
| 95 | + alias: "cuda12-transformers" |
| 96 | +- name: "rocm-transformers" |
| 97 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-transformers" |
| 98 | + alias: "rocm-transformers" |
73 | 99 | - name: "intel-sycl-f32-transformers" |
74 | 100 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-transformers" |
75 | 101 | alias: "intel-sycl-f32-transformers" |
76 | 102 |
|
77 | 103 | - name: "intel-sycl-f16-transformers" |
78 | 104 | uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f16-transformers" |
79 | 105 | alias: "intel-sycl-f16-transformers" |
80 | | - |
81 | | -- name: "intel-sycl-f32-diffusers" |
82 | | - uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers" |
83 | | - alias: "intel-sycl-f32-diffusers" |
84 | | - |
85 | | -- name: "cuda11-rerankers-master" |
86 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-rerankers" |
87 | | - alias: "rerankers" |
88 | | - |
89 | | -- name: "cuda11-vllm-master" |
90 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-vllm" |
91 | | - alias: "vllm" |
92 | | - |
93 | 106 | - name: "cuda11-transformers-master" |
94 | 107 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-transformers" |
95 | 108 | alias: "transformers" |
| 109 | +- name: "cuda11-transformers" |
| 110 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-transformers" |
| 111 | + alias: "cuda11-transformers" |
96 | 112 |
|
97 | | -- name: "cuda11-diffusers-master" |
98 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers" |
99 | | - alias: "diffusers" |
100 | | - |
101 | | -- name: "cuda11-exllama2-master" |
102 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2" |
103 | | - alias: "exllama2" |
104 | | - |
105 | | -- name: "cuda12-rerankers-master" |
106 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-rerankers" |
107 | | - alias: "rerankers" |
108 | | - |
109 | | -- name: "cuda12-vllm-master" |
110 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-vllm" |
111 | | - alias: "vllm" |
112 | 113 |
|
113 | 114 | - name: "cuda12-transformers-master" |
114 | 115 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-transformers" |
115 | 116 | alias: "transformers" |
116 | 117 |
|
117 | | -- name: "cuda12-diffusers-master" |
118 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers" |
119 | | - alias: "diffusers" |
120 | | - |
121 | | -- name: "cuda12-exllama2-master" |
122 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2" |
123 | | - alias: "exllama2" |
124 | | - |
125 | | -- name: "rocm-rerankers-master" |
126 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-rerankers" |
127 | | - alias: "rerankers" |
128 | | - |
129 | | -- name: "rocm-vllm-master" |
130 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-vllm" |
131 | | - alias: "vllm" |
132 | 118 |
|
133 | 119 | - name: "rocm-transformers-master" |
134 | 120 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-transformers" |
135 | 121 | alias: "transformers" |
136 | 122 |
|
137 | | -- name: "rocm-diffusers-master" |
138 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers" |
139 | | - alias: "diffusers" |
140 | | - |
141 | | -- name: "intel-sycl-f32-rerankers-master" |
142 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-rerankers" |
143 | | - alias: "rerankers" |
144 | | - |
145 | | -- name: "intel-sycl-f16-rerankers-master" |
146 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-rerankers" |
147 | | - alias: "rerankers" |
148 | | - |
149 | | -- name: "intel-sycl-f32-vllm-master" |
150 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-vllm" |
151 | | - alias: "vllm" |
152 | 123 |
|
153 | | -- name: "intel-sycl-f16-vllm-master" |
154 | | - uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-vllm" |
155 | | - alias: "vllm" |
156 | 124 |
|
157 | 125 | - name: "intel-sycl-f32-transformers-master" |
158 | 126 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-transformers" |
|
161 | 129 | - name: "intel-sycl-f16-transformers-master" |
162 | 130 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-transformers" |
163 | 131 | alias: "transformers" |
| 132 | +## Diffusers |
| 133 | +- name: "cuda12-diffusers" |
| 134 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-diffusers" |
| 135 | + alias: "cuda12-diffusers" |
| 136 | +- name: "rocm-diffusers" |
| 137 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-rocm-hipblas-diffusers" |
| 138 | + alias: "rocm-diffusers" |
| 139 | +- name: "cuda11-diffusers" |
| 140 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-diffusers" |
| 141 | + alias: "cuda11-diffusers" |
| 142 | + |
| 143 | + |
| 144 | +- name: "intel-sycl-f32-diffusers" |
| 145 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-intel-sycl-f32-diffusers" |
| 146 | + alias: "intel-sycl-f32-diffusers" |
| 147 | + |
| 148 | +- name: "cuda11-diffusers-master" |
| 149 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-diffusers" |
| 150 | + alias: "diffusers" |
| 151 | + |
| 152 | +- name: "cuda12-diffusers-master" |
| 153 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-diffusers" |
| 154 | + alias: "diffusers" |
| 155 | + |
| 156 | +- name: "rocm-diffusers-master" |
| 157 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-rocm-hipblas-diffusers" |
| 158 | + alias: "diffusers" |
164 | 159 |
|
165 | 160 | - name: "intel-sycl-f32-diffusers-master" |
166 | 161 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-diffusers" |
167 | 162 | alias: "diffusers" |
168 | 163 |
|
| 164 | + ## exllama2 |
| 165 | +- name: "cuda11-exllama2" |
| 166 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-11-exllama2" |
| 167 | + alias: "cuda11-exllama2" |
| 168 | +- name: "cuda12-exllama2" |
| 169 | + uri: "quay.io/go-skynet/local-ai-backends:latest-gpu-nvidia-cuda-12-exllama2" |
| 170 | + alias: "cuda12-exllama2" |
| 171 | + |
| 172 | +- name: "cuda11-exllama2-master" |
| 173 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-exllama2" |
| 174 | + alias: "exllama2" |
| 175 | + |
| 176 | + |
| 177 | +- name: "cuda12-exllama2-master" |
| 178 | + uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-12-exllama2" |
| 179 | + alias: "exllama2" |
| 180 | + |
| 181 | +## kokoro |
169 | 182 | - name: "cuda11-kokoro-master" |
170 | 183 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-kokoro" |
171 | 184 | alias: "kokoro" |
|
194 | 207 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f32-kokoro" |
195 | 208 | alias: "kokoro" |
196 | 209 |
|
| 210 | +## faster-whisper |
197 | 211 | - name: "cuda11-faster-whisper-master" |
198 | 212 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-faster-whisper" |
199 | 213 | alias: "faster-whisper" |
|
222 | 236 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-faster-whisper" |
223 | 237 | alias: "faster-whisper" |
224 | 238 |
|
| 239 | +## coqui |
| 240 | + |
225 | 241 | - name: "cuda11-coqui-master" |
226 | 242 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-coqui" |
227 | 243 | alias: "coqui" |
|
250 | 266 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-coqui" |
251 | 267 | alias: "coqui" |
252 | 268 |
|
| 269 | +## bark |
253 | 270 | - name: "cuda11-bark-master" |
254 | 271 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-bark" |
255 | 272 | alias: "bark" |
|
278 | 295 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-intel-sycl-f16-bark" |
279 | 296 | alias: "bark" |
280 | 297 |
|
| 298 | +## chatterbox |
| 299 | + |
281 | 300 | - name: "cuda11-chatterbox-master" |
282 | 301 | uri: "quay.io/go-skynet/local-ai-backends:master-gpu-nvidia-cuda-11-chatterbox" |
283 | 302 | alias: "chatterbox" |
|
0 commit comments