Skip to content
This repository was archived by the owner on Oct 11, 2024. It is now read-only.

Commit 14a7620

Browse files
khluuRobert Shaw
authored andcommitted
[ci] Setup Release pipeline and build release wheels with cache (vllm-project#5610)
Signed-off-by: kevin <[email protected]>
1 parent 0008715 commit 14a7620

File tree

2 files changed

+46
-5
lines changed

2 files changed

+46
-5
lines changed

.buildkite/release-pipeline.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
steps:
2+
- block: "Build wheels"
3+
4+
- label: "Build wheel - Python {{matrix.python_version}}, CUDA {{matrix.cuda_version}}"
5+
agents:
6+
queue: cpu_queue
7+
commands:
8+
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg CUDA_VERSION={{matrix.cuda_version}} --build-arg PYTHON_VERSION={{matrix.python_version}} --tag vllm-ci:build-image --target build --progress plain ."
9+
- "mkdir artifacts"
10+
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image cp -r dist /artifacts_host"
11+
- "aws s3 cp --recursive artifacts/dist s3://vllm-wheels/$BUILDKITE_COMMIT/"
12+
matrix:
13+
setup:
14+
cuda_version:
15+
- "11.8.0"
16+
- "12.1.0"
17+
python_version:
18+
- "3.8"
19+
- "3.9"
20+
- "3.10"
21+
- "3.11"

Dockerfile

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,26 @@
55
# docs/source/dev/dockerfile/dockerfile.rst and
66
# docs/source/assets/dev/dockerfile-stages-dependency.png
77

8+
ARG CUDA_VERSION=12.4.1
89
#################### BASE BUILD IMAGE ####################
910
# prepare basic build environment
10-
FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev
11+
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 AS base
12+
13+
ARG CUDA_VERSION=12.4.1
14+
ARG PYTHON_VERSION=3
15+
16+
ENV DEBIAN_FRONTEND=noninteractive
17+
18+
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
19+
&& echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections \
20+
&& apt-get update -y \
21+
&& apt-get install -y ccache software-properties-common \
22+
&& add-apt-repository ppa:deadsnakes/ppa \
23+
&& apt-get update -y \
24+
&& apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv python3-pip \
25+
&& if [ "${PYTHON_VERSION}" != "3" ]; then update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1; fi \
26+
&& python3 --version \
27+
&& python3 -m pip --version
1128

1229
RUN apt-get update -y \
1330
&& apt-get install -y python3-pip git curl sudo
@@ -16,7 +33,7 @@ RUN apt-get update -y \
1633
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
1734
# this won't be needed for future versions of this docker image
1835
# or future versions of triton.
19-
RUN ldconfig /usr/local/cuda-12.4/compat/
36+
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
2037

2138
WORKDIR /workspace
2239

@@ -36,7 +53,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \
3653
#################### BASE BUILD IMAGE ####################
3754

3855
#################### WHEEL BUILD IMAGE ####################
39-
FROM dev AS build
56+
FROM base AS build
57+
58+
ARG PYTHON_VERSION=3
4059

4160
# install compiler cache to speed up compilation leveraging local or remote caching
4261
RUN apt-get update -y && apt-get install -y ccache
@@ -59,7 +78,8 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \
5978

6079
#################### vLLM installation IMAGE ####################
6180
# image with vLLM installed
62-
FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base
81+
FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu22.04 AS vllm-base
82+
ARG CUDA_VERSION=12.4.1
6383
WORKDIR /vllm-workspace
6484

6585
RUN apt-get update -y && \
@@ -69,7 +89,7 @@ RUN apt-get update -y && \
6989
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
7090
# this won't be needed for future versions of this docker image
7191
# or future versions of triton.
72-
RUN ldconfig /usr/local/cuda-12.4/compat/
92+
RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/
7393

7494
# install nm-vllm wheel first, so that torch etc will be installed
7595
ARG build_type="NIGHTLY"

0 commit comments

Comments
 (0)