diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index feafb0c87..9ad2d8c1e 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -202,7 +202,7 @@ jobs: run: | echo "CUDA_VERSION=${{ matrix.cuda-version }}" >> $GITHUB_OUTPUT - test: + test-linux: strategy: fail-fast: false # TODO: add driver version here @@ -210,8 +210,6 @@ jobs: host-platform: - linux-64 - linux-aarch64 - # TODO: enable testing once win-64 GPU runners are up - # - win-64 python-version: - "3.13" - "3.12" @@ -246,7 +244,42 @@ jobs: - build secrets: inherit uses: - ./.github/workflows/test-wheel.yml + ./.github/workflows/test-wheel-linux.yml + with: + host-platform: ${{ matrix.host-platform }} + python-version: ${{ matrix.python-version }} + build-ctk-ver: ${{ needs.build.outputs.BUILD_CTK_VER }} + cuda-version: ${{ matrix.cuda-version }} + local-ctk: ${{ matrix.local-ctk}} + runner: ${{ matrix.runner }} + + test-windows: + strategy: + fail-fast: false + # TODO: add driver version here + matrix: + host-platform: + - win-64 + python-version: + - "3.12" + cuda-version: + # Note: this is for test-time only. + - "12.8.0" + - "11.8.0" + local-ctk: + - 1 # use mini CTK + - 0 # use CTK wheels + runner: + - default + name: Test (${{ matrix.host-platform }}, Python ${{ matrix.python-version }}, CUDA ${{ matrix.cuda-version }}, Runner ${{ matrix.runner }}, ${{ (matrix.local-ctk == '1' && 'local CTK') || 'CTK wheels' }}) + if: ${{ github.repository_owner == 'nvidia' }} + permissions: + contents: read # This is required for actions/checkout + needs: + - build + secrets: inherit + uses: + ./.github/workflows/test-wheel-windows.yml with: host-platform: ${{ matrix.host-platform }} python-version: ${{ matrix.python-version }} @@ -277,7 +310,8 @@ jobs: checks: read needs: - build - - test + - test-linux + - test-windows - doc secrets: inherit uses: diff --git a/.github/workflows/install_gpu_driver.ps1 b/.github/workflows/install_gpu_driver.ps1 new file mode 100644 index 000000000..980e64996 --- /dev/null +++ b/.github/workflows/install_gpu_driver.ps1 @@ -0,0 +1,33 @@ +#Requires -RunAsAdministrator + +# Install the driver +function Install-Driver { + + # Set the correct URL, filename, and arguments to the installer + # This driver is picked to support Windows 11 & CUDA 12.8 + $url = 'https://us.download.nvidia.com/tesla/572.13/572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + $file_dir = 'C:\NVIDIA-Driver\572.13-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe'; + $install_args = '/s /noeula /noreboot'; + + # Create the folder for the driver download + if (!(Test-Path -Path 'C:\NVIDIA-Driver')) { + New-Item -Path 'C:\' -Name 'NVIDIA-Driver' -ItemType 'directory' | Out-Null + } + + # Download the file to a specified directory + # Disabling progress bar due to https://github.com/GoogleCloudPlatform/compute-gpu-installation/issues/29 + $ProgressPreference_tmp = $ProgressPreference + $ProgressPreference = 'SilentlyContinue' + Write-Output 'Downloading the driver installer...' + Invoke-WebRequest $url -OutFile $file_dir + $ProgressPreference = $ProgressPreference_tmp + Write-Output 'Download complete!' + + # Install the file with the specified path from earlier as well as the RunAs admin option + Write-Output 'Running the driver installer...' + Start-Process -FilePath $file_dir -ArgumentList $install_args -Wait + Write-Output 'Done!' +} + +# Run the functions +Install-Driver diff --git a/.github/workflows/test-wheel.yml b/.github/workflows/test-wheel-linux.yml similarity index 100% rename from .github/workflows/test-wheel.yml rename to .github/workflows/test-wheel-linux.yml diff --git a/.github/workflows/test-wheel-windows.yml b/.github/workflows/test-wheel-windows.yml new file mode 100644 index 000000000..2312085df --- /dev/null +++ b/.github/workflows/test-wheel-windows.yml @@ -0,0 +1,222 @@ +name: "CI: Test wheels" + +on: + workflow_call: + inputs: + host-platform: + type: string + required: true + python-version: + type: string + required: true + build-ctk-ver: + type: string + required: true + cuda-version: + type: string + required: true + local-ctk: + type: string + required: true + runner: + type: string + required: true + +jobs: + test: + # The build stage could fail but we want the CI to keep moving. + if: ${{ github.repository_owner == 'nvidia' && !cancelled() }} + runs-on: ${{ (inputs.runner == 'default' && inputs.host-platform == 'win-64' && 'cuda-python-windows-gpu-github') }} + steps: + - name: Checkout ${{ github.event.repository.name }} + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Update driver + run: | + .github/workflows/install_gpu_driver.ps1 + + - name: Ensure GPU is working + run: nvidia-smi + + - name: Set environment variables + run: | + $PYTHON_VERSION_FORMATTED = '${{ inputs.python-version }}' -replace '\.' + $REPO_DIR = $PWD.Path + + $BUILD_CUDA_MAJOR = '${{ inputs.build-ctk-ver }}' -split '\.' | Select-Object -First 1 + $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 + if ($BUILD_CUDA_MAJOR -ne $TEST_CUDA_MAJOR) { + $SKIP_CUDA_BINDINGS_TEST = 1 + } else { + $SKIP_CUDA_BINDINGS_TEST = 0 + } + + if ('${{ inputs.local-ctk }}' -eq '1') { + if ($TEST_CUDA_MAJOR -eq '12') { + $MINI_CTK_DEPS = '["nvcc", "nvrtc", "nvjitlink"]' + } else { + $MINI_CTK_DEPS = '["nvcc", "nvrtc"]' + } + } + + # Make outputs from the previous job as env vars + $CUDA_CORE_ARTIFACT_BASENAME = "cuda-core-python${PYTHON_VERSION_FORMATTED}-${{ inputs.host-platform }}" + "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACT_BASENAME=${CUDA_CORE_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV + "CUDA_CORE_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_core\dist"))" >> $env:GITHUB_ENV + $CUDA_BINDINGS_ARTIFACT_BASENAME = "cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda${{ inputs.build-ctk-ver }}-${{ inputs.host-platform }}" + "CUDA_BINDINGS_ARTIFACT_BASENAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}" >> $env:GITHUB_ENV + "CUDA_BINDINGS_ARTIFACT_NAME=${CUDA_BINDINGS_ARTIFACT_BASENAME}-${{ github.sha }}" >> $env:GITHUB_ENV + "CUDA_BINDINGS_ARTIFACTS_DIR=$($ExecutionContext.SessionState.Path.GetUnresolvedProviderPathFromPSPath("$REPO_DIR\cuda_bindings\dist"))" >> $env:GITHUB_ENV + "SKIP_CUDA_BINDINGS_TEST=${SKIP_CUDA_BINDINGS_TEST}" >> $env:GITHUB_ENV + "MINI_CTK_DEPS=${MINI_CTK_DEPS}" >> $env:GITHUB_ENV + + - name: Download cuda-python build artifacts + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} + uses: actions/download-artifact@v4 + with: + name: cuda-python-wheel + path: . + + - name: Download cuda.bindings build artifacts + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0'}} + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_BINDINGS_ARTIFACT_NAME }} + path: ${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }} + + - name: Install gh cli + # the GPU runner image does not have gh pre-installed... + env: + # doesn't seem there's an easy way to avoid hard-coding it? + GH_MSI_URL: https://github.com/cli/cli/releases/download/v2.67.0/gh_2.67.0_windows_amd64.msi + run: | + Invoke-WebRequest -Uri "$env:GH_MSI_URL" -OutFile "gh_installer.msi" + Start-Process msiexec.exe -Wait -Verbose -ArgumentList '/i "gh_installer.msi" /qn' + $GH_POSSIBLE_PATHS = "C:\\Program Files\\GitHub CLI", "C:\\Program Files (x86)\\GitHub CLI" + foreach ($p in $GH_POSSIBLE_PATHS) { + echo "$p" >> $env:GITHUB_PATH + $env:Path += ";$p" + } + gh --version + + - name: Download cuda-python & cuda.bindings build artifacts from the prior branch + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '1'}} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + $OLD_BRANCH = Get-Content .github/BACKPORT_BRANCH + $OLD_BASENAME = "cuda-bindings-python${env:PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*" + $runData = gh run list -b $OLD_BRANCH -L 1 -w "CI: Build and test" -s completed -R NVIDIA/cuda-python --json databaseId | ConvertFrom-Json + if (-not $runData -or $runData.Length -eq 0 -or -not $runData[0].databaseId -or [string]::IsNullOrEmpty($runData[0].databaseId)) { + Write-Host "LATEST_PRIOR_RUN_ID not found!" + exit 1 + } + $LATEST_PRIOR_RUN_ID = $runData[0].databaseId + + gh run download $LATEST_PRIOR_RUN_ID -p $OLD_BASENAME -R NVIDIA/cuda-python + Get-ChildItem -Path $OLD_BASENAME + New-Item -Path "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" -ItemType Directory -Force + Move-Item -Path "$OLD_BASENAME/*.whl" -Destination "${{ env.CUDA_BINDINGS_ARTIFACTS_DIR }}" + Remove-Item -Path $OLD_BASENAME -Force + + gh run download $LATEST_PRIOR_RUN_ID -p cuda-python-wheel -R NVIDIA/cuda-python + Get-ChildItem -Path cuda-python-wheel + Move-Item -Path "cuda-python-wheel/*.whl" -Destination . + Remove-Item -Path cuda-python-wheel -Force + + - name: Display structure of downloaded cuda-python artifacts + run: | + Get-Location + Get-ChildItem -Recurse -Force | Select-Object Mode, LastWriteTime, Length, FullName + + - name: Display structure of downloaded cuda.bindings artifacts + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_BINDINGS_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + + - name: Download cuda.core build artifacts + uses: actions/download-artifact@v4 + with: + name: ${{ env.CUDA_CORE_ARTIFACT_NAME }} + path: ${{ env.CUDA_CORE_ARTIFACTS_DIR }} + + - name: Display structure of downloaded cuda.core build artifacts + run: | + Get-Location + Get-ChildItem -Recurse -Force $env:CUDA_CORE_ARTIFACTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName + + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + + - name: Set up mini CTK + if: ${{ inputs.local-ctk == '1' }} + # Note: The GH-hosted Windows GPU runner does not have Git for Windows pre-installed, + # so we cannot use our own fetch_ctk action unfortunately... + uses: Jimver/cuda-toolkit@v0.2.21 + with: + cuda: ${{ inputs.cuda-version }} + method: 'network' + sub-packages: ${{ env.MINI_CTK_DEPS }} + + - name: Update PATH + if: ${{ inputs.local-ctk == '1' }} + run: | + # mimics actual CTK installation + echo $PATH + echo "$env:CUDA_PATH\nvvm\bin" >> $env:GITHUB_PATH + + - name: Run cuda.bindings tests + if: ${{ env.SKIP_CUDA_BINDINGS_TEST == '0' }} + run: | + Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR + if ('${{ inputs.local-ctk }}' -eq '1') { + Get-ChildItem $env:CUDA_PATH + echo $PATH + pip install (Get-ChildItem -Filter *.whl).FullName + } else { + pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" + } + Pop-Location + + Push-Location ./cuda_bindings + pip install -r requirements.txt + pytest -rxXs tests/ + # skip Cython tests for now + Pop-Location + + - name: Run cuda.core tests + run: | + # If build/test majors match: cuda.bindings is installed in the previous step. + # If mismatch: cuda.bindings is installed from the backport branch. + if ($env:SKIP_CUDA_BINDINGS_TEST -eq '1') { + Push-Location $env:CUDA_BINDINGS_ARTIFACTS_DIR + if ('${{ inputs.local-ctk }}' -eq '1') { + pip install (Get-ChildItem -Filter *.whl).FullName + } else { + pip install "$((Get-ChildItem -Filter *.whl).FullName)[all]" + } + Pop-Location + } + $TEST_CUDA_MAJOR = '${{ inputs.cuda-version }}' -split '\.' | Select-Object -First 1 + Push-Location $env:CUDA_CORE_ARTIFACTS_DIR + pip install "$((Get-ChildItem -Filter *.whl).FullName)[cu${TEST_CUDA_MAJOR}]" + Pop-Location + + Push-Location ./cuda_core + pip install -r "tests/requirements-cu${TEST_CUDA_MAJOR}.txt" + pytest -rxXs tests/ + Pop-Location + + - name: Ensure cuda-python installable + run: | + if ('${{ inputs.local-ctk }}' -eq '1') { + pip install (Get-ChildItem -Filter cuda_python*.whl).FullName + } else { + pip install "$((Get-ChildItem -Filter cuda_python*.whl).FullName)[all]" + }