From 7134ed94fe3f54af89207ceb16019f7d5997553f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 12:51:33 -0500 Subject: [PATCH 01/26] add simple windows runner --- .github/workflows/build-and-test.yml | 5 +++++ .github/workflows/test-wheel.yml | 1 + 2 files changed, 6 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index feafb0c87..ed2b6a77c 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -238,6 +238,11 @@ jobs: cuda-version: "12.8.0" local-ctk: 1 runner: H100 + - host-platform: win-64 + python-version: "3.12" + cuda-version: ["12.8.0", "11.8.0"] + local-ctk: [0, 1] + runner: default name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) if: ${{ github.repository_owner == 'nvidia' }} permissions: diff --git a/.github/workflows/test-wheel.yml b/.github/workflows/test-wheel.yml index c15de07e5..f782ccd61 100644 --- a/.github/workflows/test-wheel.yml +++ b/.github/workflows/test-wheel.yml @@ -28,6 +28,7 @@ jobs: if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.runner == 'default' && inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || + (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') || (inputs.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1') }} # Our self-hosted runners require a container # TODO: use a different (nvidia?) container From a7d188e9b682628e2544944a27ae8a4b3a5704d1 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 13:15:43 -0500 Subject: [PATCH 02/26] try this --- .github/workflows/build-and-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index ed2b6a77c..488cdbf26 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -240,8 +240,8 @@ jobs: runner: H100 - host-platform: win-64 python-version: "3.12" - cuda-version: ["12.8.0", "11.8.0"] - local-ctk: [0, 1] + cuda-version: "12.8.0" + local-ctk: 0 runner: default name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) if: ${{ github.repository_owner == 'nvidia' }} From f87819254ab41502721da34c569ed574dadceb11 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 13:16:58 -0500 Subject: [PATCH 03/26] shrik build matrix for now --- .github/workflows/build-and-test.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 488cdbf26..25d3570b6 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -20,15 +20,15 @@ jobs: fail-fast: false matrix: host-platform: - - linux-64 - - linux-aarch64 +# - linux-64 +# - linux-aarch64 - win-64 python-version: - - "3.13" +# - "3.13" - "3.12" - - "3.11" - - "3.10" - - "3.9" +# - "3.11" +# - "3.10" +# - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" From 79d06511577e627dc38b777a1319eba0d9f2273c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 16:23:15 -0500 Subject: [PATCH 04/26] duplicate workflow for windows --- .github/workflows/build-and-test.yml | 58 ++++- .../{test-wheel.yml => test-wheel-linux.yml} | 0 .github/workflows/test-wheel-windows.yml | 226 ++++++++++++++++++ 3 files changed, 274 insertions(+), 10 deletions(-) rename .github/workflows/{test-wheel.yml => test-wheel-linux.yml} (100%) create mode 100644 .github/workflows/test-wheel-windows.yml diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 25d3570b6..51a1555c8 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -202,16 +202,14 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT - test: + test-linux: strategy: fail-fast: false # TODO: add driver version here matrix: host-platform: - linux-64 - - linux-aarch64 - # TODO: enable testing once win-64 GPU runners are up - # - win-64 +# - linux-aarch64 python-version: - "3.13" - "3.12" @@ -238,11 +236,50 @@ jobs: cuda-version: "12.8.0" local-ctk: 1 runner: H100 - - host-platform: win-64 - python-version: "3.12" - cuda-version: "12.8.0" + name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build + secrets: inherit + uses: + ./.github/workflows/test-wheel-linux.yml + with: + host-platform: ${{ matrix.host-platform }} + python-version: ${{ matrix.python-version }} + build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} + cuda-version: ${{ matrix.cuda-version }} + local-ctk: ${{ matrix.local-ctk}} + runner: ${{ matrix.runner }} + + test-windows: + strategy: + fail-fast: false + # TODO: add driver version here + matrix: + host-platform: + - win-64 + python-version: +# - "3.13" + - "3.12" +# - "3.11" +# - "3.10" +# - "3.9" + cuda-version: + # Note: this is for test-time only. + - "12.8.0" +# - "12.0.1" + - "11.8.0" + local-ctk: + - 1 # use mini CTK + - 0 # use CTK wheels + runner: + - default + exclude: + # To test this combo would require nontrivial installation steps. + - cuda-version: "12.0.1" local-ctk: 0 - runner: default name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) if: ${{ github.repository_owner == 'nvidia' }} permissions: @@ -251,7 +288,7 @@ jobs: - build secrets: inherit uses: - ./.github/workflows/test-wheel.yml + ./.github/workflows/test-wheel-windows.yml with: host-platform: ${{ matrix.host-platform }} python-version: ${{ matrix.python-version }} @@ -282,7 +319,8 @@ jobs: checks: read needs: - build - - test + - test-linux + - test-windows - doc secrets: inherit uses: diff --git a/.github/workflows/test-wheel.yml b/.github/workflows/test-wheel-linux.yml similarity index 100% rename from .github/workflows/test-wheel.yml rename to .github/workflows/test-wheel-linux.yml diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml new file mode 100644 index 000000000..600f92960 --- /dev/null +++ b/.github/workflows/test-wheel-windows.yml @@ -0,0 +1,226 @@ +name: "CI: Test wheels" + +on: + workflow_call: + inputs: + host-platform: + type: string + required: true + python-version: + type: string + required: true + build-ctk-ver: + type: string + required: true + cuda-version: + type: string + required: true + local-ctk: + type: string + required: true + runner: + type: string + required: true + +jobs: + test: + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} + runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || + (inputs.runner == 'default' && inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || + (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') || + (inputs.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1') }} + defaults: + run: + shell: bash --noprofile --norc -xeuo pipefail {0} + steps: + - name: Ensure GPU is working + run: nvidia-smi + + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set environment variables + run: | + PYTHON_VERSION_FORMATTED=$(echo '${{ inputs.python-version }}' | tr -d '.') + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + REPO_DIR=$(pwd) + elif [[ "${{ inputs.host-platform }}" == win* ]]; then + PWD=$(pwd) + REPO_DIR=$(cygpath -w $PWD) + fi + + BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.build-ctk-ver }})" + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" + if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then + SKIP_CUDA_BINDINGS_TEST=1 + else + SKIP_CUDA_BINDINGS_TEST=0 + fi + + # make outputs from the previous job as env vars + CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" + echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV + CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" + echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV + echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV + echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV + + - name: Install dependencies + uses: ./.github/actions/install_unix_deps + continue-on-error: false + with: + # gcc for Cython tests, jq/wget for artifact fetching + dependencies: "build-essential jq wget" + dependent_exes: "gcc jq wget" + + - name: Download cuda-python build artifacts + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} + uses: actions/download-artifact@v4 + with: + name: cuda-python-wheel + path: . + + - name: Download cuda.bindings build artifacts + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + + - name: Download cuda-python & cuda.bindings build artifacts from the prior branch + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # See https://github.com/cli/cli/blob/trunk/docs/install_linux.md#debian-ubuntu-linux-raspberry-pi-os-apt. + # gh is needed for artifact fetching. + mkdir -p -m 755 /etc/apt/keyrings \ + && out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + && cat $out | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ + && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ + && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ + && apt update \ + && apt install gh -y + + OLD_BRANCH=$(cat .github/BACKPORT_BRANCH) + OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" + LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId') + if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then + echo "LATEST_PRIOR_RUN_ID not found!" + exit 1 + fi + + gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python + ls -al $OLD_BASENAME + mkdir -p "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" + mv $OLD_BASENAME/*.whl "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"/ + rmdir $OLD_BASENAME + + gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python + ls -al cuda-python-wheel + mv cuda-python-wheel/*.whl . + rmdir cuda-python-wheel + + - name: Display structure of downloaded cuda-python artifacts + run: | + pwd + ls -lahR . + + - name: Display structure of downloaded cuda.bindings artifacts + run: | + pwd + ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR + + - name: Download cuda.core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.core build artifacts + run: | + pwd + ls -lahR $CUDA_CORE_ARTIFACTS_DIR + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + env: + # we use self-hosted runners on which setup-python behaves weirdly... + AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" + + - name: Set up mini CTK + if: ${{ inputs.local-ctk == '1' }} + uses: ./.github/actions/fetch_ctk + continue-on-error: false + with: + host-platform: ${{ inputs.host-platform }} + cuda-version: ${{ inputs.cuda-version }} + + - name: Run cuda.bindings tests + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + run: | + pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + ls $CUDA_PATH + pip install *.whl + else + pip install $(ls *.whl)[all] + fi + popd + + pushd ./cuda_bindings + pip install -r requirements.txt + pytest -rxXs tests/ + + # It is a bit convoluted to run the Cython tests against CTK wheels, + # so let's just skip them. + if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + if [[ "${{ inputs.host-platform }}" == linux* ]]; then + bash tests/cython/build_tests.sh + elif [[ "${{ inputs.host-platform }}" == win* ]]; then + # TODO: enable this once win-64 runners are up + exit 1 + fi + pytest -rxXs tests/cython + popd + fi + + - name: Run cuda.core tests + run: | + # If build/test majors match: cuda.bindings is installed in the previous step. + # If mismatch: cuda.bindings is installed from the backport branch. + if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then + pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" + if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + pip install *.whl + else + pip install $(ls *.whl)[all] + fi + popd + fi + TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" + pushd "${CUDA_CORE_ARTIFACTS_DIR}" + pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] + popd + + pushd ./cuda_core + pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" + pytest -rxXs tests/ + popd + + - name: Ensure cuda-python installable + run: | + if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + pip install cuda_python*.whl + else + pip install $(ls cuda_python*.whl)[all] + fi From 3b9b6318314bb3c87dc632a6d54dc99a801a2f11 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 16:32:14 -0500 Subject: [PATCH 05/26] fix conflicts --- .github/workflows/test-wheel-linux.yml | 1 - .github/workflows/test-wheel-windows.yml | 5 +---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/test-wheel-linux.yml b/.github/workflows/test-wheel-linux.yml index f782ccd61..c15de07e5 100644 --- a/.github/workflows/test-wheel-linux.yml +++ b/.github/workflows/test-wheel-linux.yml @@ -28,7 +28,6 @@ jobs: if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || (inputs.runner == 'default' && inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || - (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') || (inputs.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1') }} # Our self-hosted runners require a container # TODO: use a different (nvidia?) container diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 600f92960..5efef5193 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -26,10 +26,7 @@ jobs: test: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} - runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'linux-64' && 'linux-amd64-gpu-v100-latest-1') || - (inputs.runner == 'default' && inputs.host-platform == 'linux-aarch64' && 'linux-arm64-gpu-a100-latest-1') || - (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') || - (inputs.runner == 'H100' && 'linux-amd64-gpu-h100-latest-1') }} + runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} defaults: run: shell: bash --noprofile --norc -xeuo pipefail {0} From b96c246e5056768315ef72031f936acb0bced3ca Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Tue, 11 Feb 2025 17:42:24 -0500 Subject: [PATCH 06/26] try to use default (power) shell on windows runner --- .github/workflows/test-wheel-windows.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 5efef5193..de878e93c 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -27,9 +27,6 @@ jobs: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} - defaults: - run: - shell: bash --noprofile --norc -xeuo pipefail {0} steps: - name: Ensure GPU is working run: nvidia-smi @@ -69,14 +66,6 @@ jobs: echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV - - name: Install dependencies - uses: ./.github/actions/install_unix_deps - continue-on-error: false - with: - # gcc for Cython tests, jq/wget for artifact fetching - dependencies: "build-essential jq wget" - dependent_exes: "gcc jq wget" - - name: Download cuda-python build artifacts if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} uses: actions/download-artifact@v4 From 1d3aaf76f064c75a88f97fbb5d7fb69246436e34 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 09:47:55 -0500 Subject: [PATCH 07/26] use bash shell too --- .github/workflows/build-and-test.yml | 106 +++++++++++------------ .github/workflows/test-wheel-windows.yml | 4 + 2 files changed, 57 insertions(+), 53 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 51a1555c8..b95feb267 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -202,56 +202,56 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT - test-linux: - strategy: - fail-fast: false - # TODO: add driver version here - matrix: - host-platform: - - linux-64 -# - linux-aarch64 - python-version: - - "3.13" - - "3.12" - - "3.11" - - "3.10" - - "3.9" - cuda-version: - # Note: this is for test-time only. - - "12.8.0" - - "12.0.1" - - "11.8.0" - local-ctk: - - 1 # use mini CTK - - 0 # use CTK wheels - runner: - - default - exclude: - # To test this combo would require nontrivial installation steps. - - cuda-version: "12.0.1" - local-ctk: 0 - include: - - host-platform: linux-64 - python-version: "3.12" - cuda-version: "12.8.0" - local-ctk: 1 - runner: H100 - name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) - if: ${{ github.repository_owner == 'nvidia' }} - permissions: - contents: read # This is required for actions/checkout - needs: - - build - secrets: inherit - uses: - ./.github/workflows/test-wheel-linux.yml - with: - host-platform: ${{ matrix.host-platform }} - python-version: ${{ matrix.python-version }} - build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} - cuda-version: ${{ matrix.cuda-version }} - local-ctk: ${{ matrix.local-ctk}} - runner: ${{ matrix.runner }} +# test-linux: +# strategy: +# fail-fast: false +# # TODO: add driver version here +# matrix: +# host-platform: +# - linux-64 +## - linux-aarch64 +# python-version: +# - "3.13" +# - "3.12" +# - "3.11" +# - "3.10" +# - "3.9" +# cuda-version: +# # Note: this is for test-time only. +# - "12.8.0" +# - "12.0.1" +# - "11.8.0" +# local-ctk: +# - 1 # use mini CTK +# - 0 # use CTK wheels +# runner: +# - default +# exclude: +# # To test this combo would require nontrivial installation steps. +# - cuda-version: "12.0.1" +# local-ctk: 0 +# include: +# - host-platform: linux-64 +# python-version: "3.12" +# cuda-version: "12.8.0" +# local-ctk: 1 +# runner: H100 +# name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) +# if: ${{ github.repository_owner == 'nvidia' }} +# permissions: +# contents: read # This is required for actions/checkout +# needs: +# - build +# secrets: inherit +# uses: +# ./.github/workflows/test-wheel-linux.yml +# with: +# host-platform: ${{ matrix.host-platform }} +# python-version: ${{ matrix.python-version }} +# build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} +# cuda-version: ${{ matrix.cuda-version }} +# local-ctk: ${{ matrix.local-ctk}} +# runner: ${{ matrix.runner }} test-windows: strategy: @@ -284,8 +284,8 @@ jobs: if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout - needs: - - build +# needs: +# - build secrets: inherit uses: ./.github/workflows/test-wheel-windows.yml @@ -319,7 +319,7 @@ jobs: checks: read needs: - build - - test-linux +# - test-linux - test-windows - doc secrets: inherit diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index de878e93c..c21e3711b 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -27,8 +27,12 @@ jobs: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} + defaults: + run: + shell: bash --noprofile --norc -xeuo pipefail {0} steps: - name: Ensure GPU is working + shell: powershell -command ". '{0}'" run: nvidia-smi - name: Checkout ${{ github.event.repository.name }} From 5f0db6d49ad63682018a753b6d76b3ef07591a03 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 10:08:52 -0500 Subject: [PATCH 08/26] add driver update step + start rewriting to ps1 --- .github/workflows/test-wheel-windows.yml | 102 ++++++++++------------- 1 file changed, 45 insertions(+), 57 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index c21e3711b..71329ea87 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -27,12 +27,15 @@ jobs: # The build stage could fail but we want the CI to keep moving. if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} - defaults: - run: - shell: bash --noprofile --norc -xeuo pipefail {0} steps: + # TODO: need to roll our own script... this is from: + # https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#install-script + - name: Update driver + run: | + Invoke-WebRequest https://github.com/GoogleCloudPlatform/compute-gpu-installation/raw/main/windows/install_gpu_driver.ps1 -OutFile C:\install_gpu_driver.ps1 + C:\install_gpu_driver.ps1 + - name: Ensure GPU is working - shell: powershell -command ". '{0}'" run: nvidia-smi - name: Checkout ${{ github.event.repository.name }} @@ -42,33 +45,28 @@ jobs: - name: Set environment variables run: | - PYTHON_VERSION_FORMATTED=$(echo '${{ inputs.python-version }}' | tr -d '.') - if [[ "${{ inputs.host-platform }}" == linux* ]]; then - REPO_DIR=$(pwd) - elif [[ "${{ inputs.host-platform }}" == win* ]]; then - PWD=$(pwd) - REPO_DIR=$(cygpath -w $PWD) - fi - - BUILD_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.build-ctk-ver }})" - TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" - if [[ $BUILD_CUDA_MAJOR != $TEST_CUDA_MAJOR ]]; then - SKIP_CUDA_BINDINGS_TEST=1 - else - SKIP_CUDA_BINDINGS_TEST=0 - fi - - # make outputs from the previous job as env vars - CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" - echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $GITHUB_ENV - echo "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $GITHUB_ENV - echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV - echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_core/dist")" >> $GITHUB_ENV - CUDA_BINDINGS_ARTIFACT_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" - echo "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $GITHUB_ENV - echo "CUDA_BINDINGS_ARTIFACTS_DIR=$(realpath "$REPO_DIR/cuda_bindings/dist")" >> $GITHUB_ENV - echo "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $GITHUB_ENV + $PYTHON_VERSION_FORMATTED = '${{ inputs.python-version }}' -replace '\.' + $REPO_DIR = $PWD.Path + + $BUILD_CUDA_MAJOR = '${{ inputs.build-ctk-ver }}' -split '\.' | Select-Object -First 1 + $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 + if ($BUILD_CUDA_MAJOR -ne $TEST_CUDA_MAJOR) { + $SKIP_CUDA_BINDINGS_TEST = 1 + } else { + $SKIP_CUDA_BINDINGS_TEST = 0 + } + + # Make outputs from the previous job as env vars + $CUDA_CORE_ARTIFACT_BASENAME = "cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" + "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACTS_DIR=$((Resolve-Path "$REPO_DIR\cuda_core\dist").Path)" >> $env:GITHUB_ENV + $CUDA_BINDINGS_ARTIFACT_BASENAME = "cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" + "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV + "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV + "CUDA_BINDINGS_ARTIFACTS_DIR=$((Resolve-Path "$REPO_DIR\cuda_bindings\dist").Path)" >> $env:GITHUB_ENV + "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV - name: Download cuda-python build artifacts if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} @@ -89,34 +87,24 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - # See https://github.com/cli/cli/blob/trunk/docs/install_linux.md#debian-ubuntu-linux-raspberry-pi-os-apt. - # gh is needed for artifact fetching. - mkdir -p -m 755 /etc/apt/keyrings \ - && out=$(mktemp) && wget -nv -O$out https://cli.github.com/packages/githubcli-archive-keyring.gpg \ - && cat $out | tee /etc/apt/keyrings/githubcli-archive-keyring.gpg > /dev/null \ - && chmod go+r /etc/apt/keyrings/githubcli-archive-keyring.gpg \ - && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null \ - && apt update \ - && apt install gh -y - - OLD_BRANCH=$(cat .github/BACKPORT_BRANCH) - OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" - LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId') - if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then - echo "LATEST_PRIOR_RUN_ID not found!" + $OLD_BRANCH = Get-Content .github/BACKPORT_BRANCH + $OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" + $LATEST_PRIOR_RUN_ID = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json | Select-Object -ExpandProperty databaseId + if ([string]::IsNullOrEmpty($LATEST_PRIOR_RUN_ID)) { + Write-Host "LATEST_PRIOR_RUN_ID not found!" exit 1 - fi - - gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python - ls -al $OLD_BASENAME - mkdir -p "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" - mv $OLD_BASENAME/*.whl "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}"/ - rmdir $OLD_BASENAME - + } + + gh run download $LATEST_PRIOR_RUN_ID -p $OLD_BASENAME -R NVIDIA/cuda-python + Get-ChildItem -Path $OLD_BASENAME + New-Item -Path "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" -ItemType Directory -Force + Move-Item -Path "$OLD_BASENAME/*.whl" -Destination "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" + Remove-Item -Path $OLD_BASENAME -Force + gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python - ls -al cuda-python-wheel - mv cuda-python-wheel/*.whl . - rmdir cuda-python-wheel + Get-ChildItem -Path cuda-python-wheel + Move-Item -Path "cuda-python-wheel/*.whl" -Destination . + Remove-Item -Path cuda-python-wheel -Force - name: Display structure of downloaded cuda-python artifacts run: | From 5dff1973df0239925bea974cf595fc3a7bf7626c Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 10:26:35 -0500 Subject: [PATCH 09/26] custom driver installation + debug --- .github/workflows/install_gpu_driver.ps1 | 32 ++++++++++++++++++++++++ .github/workflows/test-wheel-windows.yml | 18 +++++++------ 2 files changed, 42 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/install_gpu_driver.ps1 diff --git a/.github/workflows/install_gpu_driver.ps1 b/.github/workflows/install_gpu_driver.ps1 new file mode 100644 index 000000000..b116462d3 --- /dev/null +++ b/.github/workflows/install_gpu_driver.ps1 @@ -0,0 +1,32 @@ +#Requires -RunAsAdministrator + +# Install the driver +function Install-Driver { + + # Set the correct URL, filename, and arguments to the installer + $url = 'https://us.download.nvidia.com/tesla/539.19/539.19-data-center-tesla-desktop-winserver-2019-2022-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\539.19-data-center-tesla-desktop-winserver-2019-2022-dch-international.exe'; + $install_args = '/s /noeula /noreboot'; + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specified directory + # Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 + $ProgressPreference_tmp = $ProgressPreference + $ProgressPreference = 'SilentlyContinue' + Write-Output 'Downloading the driver installer...' + Invoke-WebRequest $url -OutFile $file_dir + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' + + # Install the file with the specified path from earlier as well as the RunAs admin option + Write-Output 'Running the driver installer...' + Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait + Write-Output 'Done!' +} + +# Run the functions +Install-Driver diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 71329ea87..5a4bb43a9 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -28,20 +28,22 @@ jobs: if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} steps: - # TODO: need to roll our own script... this is from: - # https://cloud.google.com/compute/docs/gpus/install-drivers-gpu#install-script + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Update driver run: | - Invoke-WebRequest https://github.com/GoogleCloudPlatform/compute-gpu-installation/raw/main/windows/install_gpu_driver.ps1 -OutFile C:\install_gpu_driver.ps1 - C:\install_gpu_driver.ps1 + .github/workflows/install_gpu_driver.ps1 - name: Ensure GPU is working run: nvidia-smi - - name: Checkout ${{ github.event.repository.name }} - uses: actions/checkout@v4 - with: - fetch-depth: 0 + - name: Check repo + run: | + Get-ChildItem -Path . + Get-ChildItem -Path $PWD.Path - name: Set environment variables run: | From 30bfedb3eafdb54704994a57645b04ffa23564cc Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 11:03:53 -0500 Subject: [PATCH 10/26] update driver version for using on different VM --- .github/workflows/install_gpu_driver.ps1 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/install_gpu_driver.ps1 b/.github/workflows/install_gpu_driver.ps1 index b116462d3..980e64996 100644 --- a/.github/workflows/install_gpu_driver.ps1 +++ b/.github/workflows/install_gpu_driver.ps1 @@ -4,8 +4,9 @@ function Install-Driver { # Set the correct URL, filename, and arguments to the installer - $url = 'https://us.download.nvidia.com/tesla/539.19/539.19-data-center-tesla-desktop-winserver-2019-2022-dch-international.exe'; - $file_dir = 'C:\NVIDIA-Driver\539.19-data-center-tesla-desktop-winserver-2019-2022-dch-international.exe'; + # This driver is picked to support Windows 11 & CUDA 12.8 + $url = 'https://us.download.nvidia.com/tesla/572.13/572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; $install_args = '/s /noeula /noreboot'; # Create the folder for the driver download From d1e6e574547844b197052af752c192f3c5c5ea74 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 11:24:25 -0500 Subject: [PATCH 11/26] avoid Resolve-Path; add gh install step --- .github/workflows/test-wheel-windows.yml | 31 ++++++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 5a4bb43a9..da99d6842 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -33,12 +33,12 @@ jobs: with: fetch-depth: 0 - - name: Update driver - run: | - .github/workflows/install_gpu_driver.ps1 - - - name: Ensure GPU is working - run: nvidia-smi +# - name: Update driver +# run: | +# .github/workflows/install_gpu_driver.ps1 +# +# - name: Ensure GPU is working +# run: nvidia-smi - name: Check repo run: | @@ -63,11 +63,11 @@ jobs: "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV - "CUDA_CORE_ARTIFACTS_DIR=$((Resolve-Path "$REPO_DIR\cuda_core\dist").Path)" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_core\dist"))" >> $env:GITHUB_ENV $CUDA_BINDINGS_ARTIFACT_BASENAME = "cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV - "CUDA_BINDINGS_ARTIFACTS_DIR=$((Resolve-Path "$REPO_DIR\cuda_bindings\dist").Path)" >> $env:GITHUB_ENV + "CUDA_BINDINGS_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_bindings\dist"))" >> $env:GITHUB_ENV "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV - name: Download cuda-python build artifacts @@ -84,6 +84,21 @@ jobs: name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + - name: Install gh cli + # the GPU runner image does not have gh pre-installed... + env: + # doesn't seem there's an easy way to avoid hard-coding it? + GH_MSI_URL: https://github.com/cli/cli/releases/download/v2.67.0/gh_2.67.0_windows_amd64.msi + run: | + Invoke-WebRequest -Uri "$env:GH_MSI_URL" -OutFile "gh_installer.msi" + Start-Process msiexec.exe -Wait -Verbose -ArgumentList '/i "gh_installer.msi" /qn' + $GH_POSSIBLE_PATHS = "C:\\Program Files\\GitHub CLI", "C:\\Program Files (x86)\\GitHub CLI" + foreach ($p in $GH_POSSIBLE_PATHS) { + echo "$p" >> $env:GITHUB_PATH + $env:Path += ";$p" + } + gh --version + - name: Download cuda-python & cuda.bindings build artifacts from the prior branch if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}} env: From f9f0cb33af09b5ccfaff3a684aefa09e3ad9bbbb Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 11:31:12 -0500 Subject: [PATCH 12/26] restore job dependency --- .github/workflows/build-and-test.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index b95feb267..67b262091 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -261,11 +261,7 @@ jobs: host-platform: - win-64 python-version: -# - "3.13" - "3.12" -# - "3.11" -# - "3.10" -# - "3.9" cuda-version: # Note: this is for test-time only. - "12.8.0" @@ -284,8 +280,8 @@ jobs: if: ${{ github.repository_owner == 'nvidia' }} permissions: contents: read # This is required for actions/checkout -# needs: -# - build + needs: + - build secrets: inherit uses: ./.github/workflows/test-wheel-windows.yml From 76e1025e7aecca8d5575c37511178c1e1df7a3b3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 11:56:59 -0500 Subject: [PATCH 13/26] port the remaining steps to PS too --- .github/workflows/test-wheel-windows.yml | 76 +++++++++++++----------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index da99d6842..bb9641082 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -125,13 +125,13 @@ jobs: - name: Display structure of downloaded cuda-python artifacts run: | - pwd - ls -lahR . + Get-Location + Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName - name: Display structure of downloaded cuda.bindings artifacts run: | - pwd - ls -lahR $CUDA_BINDINGS_ARTIFACTS_DIR + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName - name: Download cuda.core build artifacts uses: actions/download-artifact@v4 @@ -141,16 +141,16 @@ jobs: - name: Display structure of downloaded cuda.core build artifacts run: | - pwd - ls -lahR $CUDA_CORE_ARTIFACTS_DIR + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_CORE_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} - env: - # we use self-hosted runners on which setup-python behaves weirdly... - AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" +# env: +# # we use self-hosted runners on which setup-python behaves weirdly... +# AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" - name: Set up mini CTK if: ${{ inputs.local-ctk == '1' }} @@ -163,31 +163,20 @@ jobs: - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | - pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" - if [[ "${{ inputs.local-ctk }}" == 1 ]]; then - ls $CUDA_PATH + Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR + if ('${{ inputs.local-ctk }}' -eq '1') { + Get-ChildItem $env:CUDA_PATH pip install *.whl - else - pip install $(ls *.whl)[all] - fi - popd + } else { + pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all] + } + Pop-Location - pushd ./cuda_bindings + Push-Location ./cuda_bindings pip install -r requirements.txt pytest -rxXs tests/ - - # It is a bit convoluted to run the Cython tests against CTK wheels, - # so let's just skip them. - if [[ "${{ inputs.local-ctk }}" == 1 ]]; then - if [[ "${{ inputs.host-platform }}" == linux* ]]; then - bash tests/cython/build_tests.sh - elif [[ "${{ inputs.host-platform }}" == win* ]]; then - # TODO: enable this once win-64 runners are up - exit 1 - fi - pytest -rxXs tests/cython - popd - fi + # skip Cython tests for now + Pop-Location - name: Run cuda.core tests run: | @@ -212,10 +201,29 @@ jobs: pytest -rxXs tests/ popd + if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') { + Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR + if ('${{ inputs.local-ctk }}' -eq '1') { + pip install *.whl + } else { + pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all] + } + Pop-Location + } + $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 + Push-Location $env:CUDA_CORE_ARTIFACTS_DIR + pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[cu${TEST_CUDA_MAJOR}] + Pop-Location + + Push-Location ./cuda_core + pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" + pytest -rxXs tests/ + Pop-Location + - name: Ensure cuda-python installable run: | - if [[ "${{ inputs.local-ctk }}" == 1 ]]; then + if ('${{ inputs.local-ctk }}' -eq '1') { pip install cuda_python*.whl - else - pip install $(ls cuda_python*.whl)[all] - fi + } else { + pip install (Get-ChildItem -Filter cuda_python*.whl | Select-Object -ExpandProperty Name -First 1)[all] + } From f82c89307d0b34ea7f72e756237d8b64441ea9e5 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 12:10:55 -0500 Subject: [PATCH 14/26] try to fix extras --- .github/workflows/test-wheel-windows.yml | 25 +++--------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index bb9641082..524afea9b 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -168,7 +168,7 @@ jobs: Get-ChildItem $env:CUDA_PATH pip install *.whl } else { - pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all] + pip install "(Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all]" } Pop-Location @@ -182,31 +182,12 @@ jobs: run: | # If build/test majors match: cuda.bindings is installed in the previous step. # If mismatch: cuda.bindings is installed from the backport branch. - if [[ "${SKIP_CUDA_BINDINGS_TEST}" == 1 ]]; then - pushd "${CUDA_BINDINGS_ARTIFACTS_DIR}" - if [[ "${{ inputs.local-ctk }}" == 1 ]]; then - pip install *.whl - else - pip install $(ls *.whl)[all] - fi - popd - fi - TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${{ inputs.cuda-version }})" - pushd "${CUDA_CORE_ARTIFACTS_DIR}" - pip install $(ls *.whl)["cu${TEST_CUDA_MAJOR}"] - popd - - pushd ./cuda_core - pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" - pytest -rxXs tests/ - popd - if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') { Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { pip install *.whl } else { - pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all] + pip install "(Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all]" } Pop-Location } @@ -225,5 +206,5 @@ jobs: if ('${{ inputs.local-ctk }}' -eq '1') { pip install cuda_python*.whl } else { - pip install (Get-ChildItem -Filter cuda_python*.whl | Select-Object -ExpandProperty Name -First 1)[all] + pip install "(Get-ChildItem -Filter cuda_python*.whl | Select-Object -ExpandProperty Name -First 1)[all]" } From f9807bc82272cc41406af9796f0ba92bc6ad0872 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 13:25:11 -0500 Subject: [PATCH 15/26] avoid using our own fetch_ctk since it's using bash --- .github/workflows/test-wheel-windows.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 524afea9b..5408e9c4a 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -154,11 +154,16 @@ jobs: - name: Set up mini CTK if: ${{ inputs.local-ctk == '1' }} - uses: ./.github/actions/fetch_ctk - continue-on-error: false + uses: Jimver/cuda-toolkit@0.2.21 with: - host-platform: ${{ inputs.host-platform }} - cuda-version: ${{ inputs.cuda-version }} + cuda: ${{ inputs.cuda-version }} + method: 'network' + sub-packages: ["nvcc", "nvrtc", "nvjitlink"] +# uses: ./.github/actions/fetch_ctk +# continue-on-error: false +# with: +# host-platform: ${{ inputs.host-platform }} +# cuda-version: ${{ inputs.cuda-version }} - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} From a8d3c4f1e1515dd5d2a9c2192fbc393a02437021 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 13:32:24 -0500 Subject: [PATCH 16/26] fix typo --- .github/workflows/test-wheel-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 5408e9c4a..54adf2fff 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -158,7 +158,7 @@ jobs: with: cuda: ${{ inputs.cuda-version }} method: 'network' - sub-packages: ["nvcc", "nvrtc", "nvjitlink"] + sub-packages: '["nvcc", "nvrtc", "nvjitlink"]' # uses: ./.github/actions/fetch_ctk # continue-on-error: false # with: From 9fcb968acb4d626db5b7eed25c57c55bf15d2198 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 13:42:34 -0500 Subject: [PATCH 17/26] fix typo again --- .github/workflows/test-wheel-windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 54adf2fff..63e21a1c7 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -154,7 +154,7 @@ jobs: - name: Set up mini CTK if: ${{ inputs.local-ctk == '1' }} - uses: Jimver/cuda-toolkit@0.2.21 + uses: Jimver/cuda-toolkit@v0.2.21 with: cuda: ${{ inputs.cuda-version }} method: 'network' From c1e85646fe1ce0b0e0274d7e31ed84b5498f25d3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 14:25:09 -0500 Subject: [PATCH 18/26] fixes --- .github/workflows/test-wheel-windows.yml | 30 ++++++++++-------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 63e21a1c7..3c519f4a9 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -40,11 +40,6 @@ jobs: # - name: Ensure GPU is working # run: nvidia-smi - - name: Check repo - run: | - Get-ChildItem -Path . - Get-ChildItem -Path $PWD.Path - - name: Set environment variables run: | $PYTHON_VERSION_FORMATTED = '${{ inputs.python-version }}' -replace '\.' @@ -106,12 +101,13 @@ jobs: run: | $OLD_BRANCH = Get-Content .github/BACKPORT_BRANCH $OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" - $LATEST_PRIOR_RUN_ID = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json | Select-Object -ExpandProperty databaseId - if ([string]::IsNullOrEmpty($LATEST_PRIOR_RUN_ID)) { - Write-Host "LATEST_PRIOR_RUN_ID not found!" - exit 1 + $runData = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json + if (-not $runData -or $runData.Length -eq 0 -or -not $runData[0].databaseId -or [string]::IsNullOrEmpty($runData[0].databaseId)) { + Write-Host "LATEST_PRIOR_RUN_ID not found!" + exit 1 } - + $LATEST_PRIOR_RUN_ID = $runData[0].databaseId + gh run download $LATEST_PRIOR_RUN_ID -p $OLD_BASENAME -R NVIDIA/cuda-python Get-ChildItem -Path $OLD_BASENAME New-Item -Path "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" -ItemType Directory -Force @@ -171,9 +167,9 @@ jobs: Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { Get-ChildItem $env:CUDA_PATH - pip install *.whl + pip install (Get-ChildItem -Filter *.whl).FullName } else { - pip install "(Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all]" + pip install "((Get-ChildItem -Filter *.whl).FullName)[all]" } Pop-Location @@ -190,15 +186,15 @@ jobs: if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') { Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { - pip install *.whl + pip install (Get-ChildItem -Filter *.whl).FullName } else { - pip install "(Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[all]" + pip install "((Get-ChildItem -Filter *.whl).FullName)[all]" } Pop-Location } $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 Push-Location $env:CUDA_CORE_ARTIFACTS_DIR - pip install (Get-ChildItem -Filter *.whl | Select-Object -ExpandProperty Name -First 1)[cu${TEST_CUDA_MAJOR}] + pip install "((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]" Pop-Location Push-Location ./cuda_core @@ -209,7 +205,7 @@ jobs: - name: Ensure cuda-python installable run: | if ('${{ inputs.local-ctk }}' -eq '1') { - pip install cuda_python*.whl + pip install (Get-ChildItem -Filter cuda_python*.whl).FullName } else { - pip install "(Get-ChildItem -Filter cuda_python*.whl | Select-Object -ExpandProperty Name -First 1)[all]" + pip install "((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" } From ec9be1981eec505c2b08552f8cc86e6bd85185f3 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 14:43:22 -0500 Subject: [PATCH 19/26] force evaluation --- .github/workflows/test-wheel-windows.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 3c519f4a9..7ed2899cc 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -169,7 +169,7 @@ jobs: Get-ChildItem $env:CUDA_PATH pip install (Get-ChildItem -Filter *.whl).FullName } else { - pip install "((Get-ChildItem -Filter *.whl).FullName)[all]" + pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" } Pop-Location @@ -188,13 +188,13 @@ jobs: if ('${{ inputs.local-ctk }}' -eq '1') { pip install (Get-ChildItem -Filter *.whl).FullName } else { - pip install "((Get-ChildItem -Filter *.whl).FullName)[all]" + pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" } Pop-Location } $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 Push-Location $env:CUDA_CORE_ARTIFACTS_DIR - pip install "((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]" + pip install "$((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]" Pop-Location Push-Location ./cuda_core @@ -207,5 +207,5 @@ jobs: if ('${{ inputs.local-ctk }}' -eq '1') { pip install (Get-ChildItem -Filter cuda_python*.whl).FullName } else { - pip install "((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" + pip install "$((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" } From 87c68e3c26dee26f89b6cfc212d6185c6ba9ae6b Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 14:44:34 -0500 Subject: [PATCH 20/26] resume driver install --- .github/workflows/test-wheel-windows.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 7ed2899cc..002868e22 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -33,12 +33,12 @@ jobs: with: fetch-depth: 0 -# - name: Update driver -# run: | -# .github/workflows/install_gpu_driver.ps1 -# -# - name: Ensure GPU is working -# run: nvidia-smi + - name: Update driver + run: | + .github/workflows/install_gpu_driver.ps1 + + - name: Ensure GPU is working + run: nvidia-smi - name: Set environment variables run: | From 367050bdc0b18f275c0d190e939e30818d09b35f Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 15:13:16 -0500 Subject: [PATCH 21/26] clean up and add MINI_CTK_DEPS --- .github/workflows/build-and-test.yml | 5 ----- .github/workflows/test-wheel-windows.yml | 21 ++++++++++++--------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 67b262091..a35059c81 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -265,17 +265,12 @@ jobs: cuda-version: # Note: this is for test-time only. - "12.8.0" -# - "12.0.1" - "11.8.0" local-ctk: - 1 # use mini CTK - 0 # use CTK wheels runner: - default - exclude: - # To test this combo would require nontrivial installation steps. - - cuda-version: "12.0.1" - local-ctk: 0 name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) if: ${{ github.repository_owner == 'nvidia' }} permissions: diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 002868e22..b3632d095 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -53,6 +53,14 @@ jobs: $SKIP_CUDA_BINDINGS_TEST = 0 } + if ('${{ inputs.local-ctk }}' -eq '1') { + if ($TEST_CUDA_MAJOR -eq '12') { + $MINI_CTK_DEPS = '["nvcc", "nvrtc", "nvjitlink"]' + } else { + $MINI_CTK_DEPS = '["nvcc", "nvrtc"]' + } + } + # Make outputs from the previous job as env vars $CUDA_CORE_ARTIFACT_BASENAME = "cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV @@ -64,6 +72,7 @@ jobs: "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV "CUDA_BINDINGS_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_bindings\dist"))" >> $env:GITHUB_ENV "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV + "MINI_CTK_DEPS=${MINI_CTK_DEPS}" >> $env:GITHUB_ENV - name: Download cuda-python build artifacts if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} @@ -144,22 +153,16 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ inputs.python-version }} -# env: -# # we use self-hosted runners on which setup-python behaves weirdly... -# AGENT_TOOLSDIRECTORY: "/opt/hostedtoolcache" - name: Set up mini CTK if: ${{ inputs.local-ctk == '1' }} + # Note: The GH-hosted Windows GPU runner does not have Git for Windows pre-installed, + # so we cannot use our own fetch_ctk action unfortunately... uses: Jimver/cuda-toolkit@v0.2.21 with: cuda: ${{ inputs.cuda-version }} method: 'network' - sub-packages: '["nvcc", "nvrtc", "nvjitlink"]' -# uses: ./.github/actions/fetch_ctk -# continue-on-error: false -# with: -# host-platform: ${{ inputs.host-platform }} -# cuda-version: ${{ inputs.cuda-version }} + sub-packages: ${{ env.MINI_CTK_DEPS }} - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} From ee0be1154ad80a8b2d2b813a9a1273bc850c2d4a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 16:00:40 -0500 Subject: [PATCH 22/26] fix nvvm tests with local CTK --- .github/workflows/test-wheel-windows.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index b3632d095..73a60bb90 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -164,6 +164,12 @@ jobs: method: 'network' sub-packages: ${{ env.MINI_CTK_DEPS }} + - name: Update PATH + if: ${{ inputs.local-ctk == '1' }} + run: | + # mimics actual CTK installation + "${CUDA_PATH}\\nvvm\\bin" >> $env:GITHUB_PATH + - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} run: | From b677dbd44e182acddfd8310dca80eb3a3ac06860 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 16:29:49 -0500 Subject: [PATCH 23/26] debug --- .github/workflows/test-wheel-windows.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 73a60bb90..d14dc7b1f 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -168,7 +168,8 @@ jobs: if: ${{ inputs.local-ctk == '1' }} run: | # mimics actual CTK installation - "${CUDA_PATH}\\nvvm\\bin" >> $env:GITHUB_PATH + echo $PATH + echo "${CUDA_PATH}\\nvvm\\bin" >> $env:GITHUB_PATH - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} @@ -176,6 +177,9 @@ jobs: Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { Get-ChildItem $env:CUDA_PATH + Get-ChildItem "${CUDA_PATH}\\bin" + Get-ChildItem "${CUDA_PATH}\\nvvm\\bin" + echo $PATH pip install (Get-ChildItem -Filter *.whl).FullName } else { pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" From eb224ece7708edb2a1125294dc3fb14227d9fc8a Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 16:51:07 -0500 Subject: [PATCH 24/26] it does not seem we need to escape --- .github/workflows/test-wheel-windows.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index d14dc7b1f..1cd1f90c3 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -169,7 +169,7 @@ jobs: run: | # mimics actual CTK installation echo $PATH - echo "${CUDA_PATH}\\nvvm\\bin" >> $env:GITHUB_PATH + echo "${CUDA_PATH}\nvvm\bin" >> $env:GITHUB_PATH - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} @@ -177,8 +177,8 @@ jobs: Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { Get-ChildItem $env:CUDA_PATH - Get-ChildItem "${CUDA_PATH}\\bin" - Get-ChildItem "${CUDA_PATH}\\nvvm\\bin" + Get-ChildItem "${CUDA_PATH}\bin" + Get-ChildItem "${CUDA_PATH}\nvvm\bin" echo $PATH pip install (Get-ChildItem -Filter *.whl).FullName } else { From 792745bb6db7434de8d662d7eea4afcdc68b7978 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 17:11:06 -0500 Subject: [PATCH 25/26] fix again... --- .github/workflows/test-wheel-windows.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml index 1cd1f90c3..2312085df 100644 --- a/.github/workflows/test-wheel-windows.yml +++ b/.github/workflows/test-wheel-windows.yml @@ -169,7 +169,7 @@ jobs: run: | # mimics actual CTK installation echo $PATH - echo "${CUDA_PATH}\nvvm\bin" >> $env:GITHUB_PATH + echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH - name: Run cuda.bindings tests if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} @@ -177,8 +177,6 @@ jobs: Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR if ('${{ inputs.local-ctk }}' -eq '1') { Get-ChildItem $env:CUDA_PATH - Get-ChildItem "${CUDA_PATH}\bin" - Get-ChildItem "${CUDA_PATH}\nvvm\bin" echo $PATH pip install (Get-ChildItem -Filter *.whl).FullName } else { From 176c94d09f2f29675f822ac986cd72aae84d1e76 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Wed, 19 Feb 2025 17:39:15 -0500 Subject: [PATCH 26/26] clean up for review --- .github/workflows/build-and-test.yml | 114 +++++++++++++-------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index a35059c81..9ad2d8c1e 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -20,15 +20,15 @@ jobs: fail-fast: false matrix: host-platform: -# - linux-64 -# - linux-aarch64 + - linux-64 + - linux-aarch64 - win-64 python-version: -# - "3.13" + - "3.13" - "3.12" -# - "3.11" -# - "3.10" -# - "3.9" + - "3.11" + - "3.10" + - "3.9" cuda-version: # Note: this is for build-time only. - "12.8.0" @@ -202,56 +202,56 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT -# test-linux: -# strategy: -# fail-fast: false -# # TODO: add driver version here -# matrix: -# host-platform: -# - linux-64 -## - linux-aarch64 -# python-version: -# - "3.13" -# - "3.12" -# - "3.11" -# - "3.10" -# - "3.9" -# cuda-version: -# # Note: this is for test-time only. -# - "12.8.0" -# - "12.0.1" -# - "11.8.0" -# local-ctk: -# - 1 # use mini CTK -# - 0 # use CTK wheels -# runner: -# - default -# exclude: -# # To test this combo would require nontrivial installation steps. -# - cuda-version: "12.0.1" -# local-ctk: 0 -# include: -# - host-platform: linux-64 -# python-version: "3.12" -# cuda-version: "12.8.0" -# local-ctk: 1 -# runner: H100 -# name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) -# if: ${{ github.repository_owner == 'nvidia' }} -# permissions: -# contents: read # This is required for actions/checkout -# needs: -# - build -# secrets: inherit -# uses: -# ./.github/workflows/test-wheel-linux.yml -# with: -# host-platform: ${{ matrix.host-platform }} -# python-version: ${{ matrix.python-version }} -# build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} -# cuda-version: ${{ matrix.cuda-version }} -# local-ctk: ${{ matrix.local-ctk}} -# runner: ${{ matrix.runner }} + test-linux: + strategy: + fail-fast: false + # TODO: add driver version here + matrix: + host-platform: + - linux-64 + - linux-aarch64 + python-version: + - "3.13" + - "3.12" + - "3.11" + - "3.10" + - "3.9" + cuda-version: + # Note: this is for test-time only. + - "12.8.0" + - "12.0.1" + - "11.8.0" + local-ctk: + - 1 # use mini CTK + - 0 # use CTK wheels + runner: + - default + exclude: + # To test this combo would require nontrivial installation steps. + - cuda-version: "12.0.1" + local-ctk: 0 + include: + - host-platform: linux-64 + python-version: "3.12" + cuda-version: "12.8.0" + local-ctk: 1 + runner: H100 + name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build + secrets: inherit + uses: + ./.github/workflows/test-wheel-linux.yml + with: + host-platform: ${{ matrix.host-platform }} + python-version: ${{ matrix.python-version }} + build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} + cuda-version: ${{ matrix.cuda-version }} + local-ctk: ${{ matrix.local-ctk}} + runner: ${{ matrix.runner }} test-windows: strategy: @@ -310,7 +310,7 @@ jobs: checks: read needs: - build -# - test-linux + - test-linux - test-windows - doc secrets: inherit