- 
                Notifications
    You must be signed in to change notification settings 
- Fork 6.5k
[CI] add a big GPU marker to run memory-intensive tests separately on CI #9691
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 37 commits
32e23d8
              da92ca0
              219a3cc
              c679563
              a0bae4b
              95f396e
              02f0aa3
              9441016
              15d1127
              6c82fd4
              676b8a5
              3b50732
              9ef5435
              4ff06b4
              46cab82
              2b25688
              b0568da
              928dd73
              9020d8f
              2732720
              f265f7d
              1755305
              fcb57ae
              6f477ac
              ff47576
              1ad8c64
              605a21d
              9e1cacb
              0704d9a
              c9fd1ab
              f8086f6
              e31b0bd
              cf280ba
              5b9c771
              0e07597
              4fcd223
              1302ecd
              2084be0
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -2,6 +2,7 @@ name: Nightly and release tests on main/release branch | |
|  | ||
| on: | ||
| workflow_dispatch: | ||
| pull_request: | ||
| schedule: | ||
| - cron: "0 0 * * *" # every day at midnight | ||
|  | ||
|  | @@ -18,6 +19,7 @@ env: | |
|  | ||
| jobs: | ||
| setup_torch_cuda_pipeline_matrix: | ||
| if: github.event_name == 'schedule' | ||
|          | ||
| name: Setup Torch Pipelines CUDA Slow Tests Matrix | ||
| runs-on: | ||
| group: aws-general-8-plus | ||
|  | @@ -49,6 +51,7 @@ jobs: | |
| path: reports | ||
|  | ||
| run_nightly_tests_for_torch_pipelines: | ||
| if: github.event_name == 'schedule' | ||
| name: Nightly Torch Pipelines CUDA Tests | ||
| needs: setup_torch_cuda_pipeline_matrix | ||
| strategy: | ||
|  | @@ -106,6 +109,7 @@ jobs: | |
| python utils/log_reports.py >> $GITHUB_STEP_SUMMARY | ||
|  | ||
| run_nightly_tests_for_other_torch_modules: | ||
| if: github.event_name == 'schedule' | ||
| name: Nightly Torch CUDA Tests | ||
| runs-on: | ||
| group: aws-g4dn-2xlarge | ||
|  | @@ -180,6 +184,62 @@ jobs: | |
| pip install slack_sdk tabulate | ||
| python utils/log_reports.py >> $GITHUB_STEP_SUMMARY | ||
|  | ||
| run_big_gpu_torch_tests: | ||
| name: Torch tests on big GPU | ||
| strategy: | ||
| fail-fast: false | ||
| max-parallel: 2 | ||
| runs-on: | ||
| group: aws-g6e-xlarge-plus | ||
| container: | ||
| image: diffusers/diffusers-pytorch-cuda | ||
| options: --shm-size "16gb" --ipc host --gpus 0 | ||
| steps: | ||
| - name: Checkout diffusers | ||
| uses: actions/checkout@v3 | ||
| with: | ||
| fetch-depth: 2 | ||
| - name: NVIDIA-SMI | ||
| run: nvidia-smi | ||
| - name: Install dependencies | ||
| run: | | ||
| python -m venv /opt/venv && export PATH="/opt/venv/bin:$PATH" | ||
| python -m uv pip install -e [quality,test] | ||
| python -m uv pip install peft@git+https://github.com/huggingface/peft.git | ||
| pip uninstall accelerate -y && python -m uv pip install -U accelerate@git+https://github.com/huggingface/accelerate.git | ||
| python -m uv pip install pytest-reportlog | ||
| - name: Environment | ||
| run: | | ||
| python utils/print_env.py | ||
| - name: Selected Torch CUDA Test on big GPU | ||
| env: | ||
| HF_TOKEN: ${{ secrets.DIFFUSERS_HF_HUB_READ_TOKEN }} | ||
| # https://pytorch.org/docs/stable/notes/randomness.html#avoiding-nondeterministic-algorithms | ||
| CUBLAS_WORKSPACE_CONFIG: :16:8 | ||
| BIG_GPU_MEMORY: 40 | ||
| run: | | ||
| python -m pytest -n 1 --max-worker-restart=0 --dist=loadfile \ | ||
| -m "big_gpu_with_torch_cuda" \ | ||
| --make-reports=tests_big_gpu_torch_cuda \ | ||
| --report-log=tests_big_gpu_torch_cuda.log \ | ||
| tests/ | ||
| - name: Failure short reports | ||
| if: ${{ failure() }} | ||
| run: | | ||
| cat reports/tests_big_gpu_torch_cuda_stats.txt | ||
| cat reports/tests_big_gpu_torch_cuda_failures_short.txt | ||
| - name: Test suite reports artifacts | ||
| if: ${{ always() }} | ||
| uses: actions/upload-artifact@v4 | ||
| with: | ||
| name: torch_cuda_big_gpu_test_reports | ||
| path: reports | ||
| - name: Generate Report and Notify Channel | ||
| if: always() | ||
| run: | | ||
| pip install slack_sdk tabulate | ||
| python utils/log_reports.py >> $GITHUB_STEP_SUMMARY | ||
|  | ||
| run_flax_tpu_tests: | ||
| name: Nightly Flax TPU Tests | ||
| runs-on: docker-tpu | ||
|  | @@ -237,6 +297,7 @@ jobs: | |
| python utils/log_reports.py >> $GITHUB_STEP_SUMMARY | ||
|  | ||
| run_nightly_onnx_tests: | ||
| if: github.event_name == 'schedule' | ||
| name: Nightly ONNXRuntime CUDA tests on Ubuntu | ||
| runs-on: | ||
| group: aws-g4dn-2xlarge | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -1,4 +1,3 @@ | ||
| import gc | ||
| import unittest | ||
|  | ||
| import numpy as np | ||
|  | @@ -13,9 +12,6 @@ | |
| FluxTransformer2DModel, | ||
| ) | ||
| from diffusers.utils.testing_utils import ( | ||
| numpy_cosine_similarity_distance, | ||
| require_torch_gpu, | ||
| slow, | ||
| torch_device, | ||
| ) | ||
|  | ||
|  | @@ -222,70 +218,3 @@ def test_fused_qkv_projections(self): | |
| assert np.allclose( | ||
| original_image_slice, image_slice_disabled, atol=1e-2, rtol=1e-2 | ||
| ), "Original outputs should match when fused QKV projections are disabled." | ||
|  | ||
|  | ||
| @slow | ||
| @require_torch_gpu | ||
| class FluxControlNetImg2ImgPipelineSlowTests(unittest.TestCase): | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this test was correctly done as it doesn't pass the  | ||
| pipeline_class = FluxControlNetImg2ImgPipeline | ||
| repo_id = "black-forest-labs/FLUX.1-schnell" | ||
|  | ||
| def setUp(self): | ||
| super().setUp() | ||
| gc.collect() | ||
| torch.cuda.empty_cache() | ||
|  | ||
| def tearDown(self): | ||
| super().tearDown() | ||
| gc.collect() | ||
| torch.cuda.empty_cache() | ||
|  | ||
| def get_inputs(self, device, seed=0): | ||
| if str(device).startswith("mps"): | ||
| generator = torch.manual_seed(seed) | ||
| else: | ||
| generator = torch.Generator(device="cpu").manual_seed(seed) | ||
|  | ||
| image = torch.randn(1, 3, 64, 64).to(device) | ||
| control_image = torch.randn(1, 3, 64, 64).to(device) | ||
|  | ||
| return { | ||
| "prompt": "A photo of a cat", | ||
| "image": image, | ||
| "control_image": control_image, | ||
| "num_inference_steps": 2, | ||
| "guidance_scale": 5.0, | ||
| "controlnet_conditioning_scale": 1.0, | ||
| "strength": 0.8, | ||
| "output_type": "np", | ||
| "generator": generator, | ||
| } | ||
|  | ||
| @unittest.skip("We cannot run inference on this model with the current CI hardware") | ||
| def test_flux_controlnet_img2img_inference(self): | ||
| pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.bfloat16) | ||
| pipe.enable_model_cpu_offload() | ||
|  | ||
| inputs = self.get_inputs(torch_device) | ||
|  | ||
| image = pipe(**inputs).images[0] | ||
| image_slice = image[0, :10, :10] | ||
| expected_slice = np.array( | ||
| [ | ||
| [0.36132812, 0.30004883, 0.25830078], | ||
| [0.36669922, 0.31103516, 0.23754883], | ||
| [0.34814453, 0.29248047, 0.23583984], | ||
| [0.35791016, 0.30981445, 0.23999023], | ||
| [0.36328125, 0.31274414, 0.2607422], | ||
| [0.37304688, 0.32177734, 0.26171875], | ||
| [0.3671875, 0.31933594, 0.25756836], | ||
| [0.36035156, 0.31103516, 0.2578125], | ||
| [0.3857422, 0.33789062, 0.27563477], | ||
| [0.3701172, 0.31982422, 0.265625], | ||
| ], | ||
| dtype=np.float32, | ||
| ) | ||
|  | ||
| max_diff = numpy_cosine_similarity_distance(expected_slice.flatten(), image_slice.flatten()) | ||
|  | ||
| assert max_diff < 1e-4 | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is temporary.