huggingface · patrickvonplaten · Nov 29, 2023 · Aug 21, 2023 · Aug 21, 2023 · Aug 23, 2023
diff --git a/docs/source/en/api/pipelines/text_to_video_zero.md b/docs/source/en/api/pipelines/text_to_video_zero.md
@@ -99,6 +99,19 @@ imageio.mimsave("video.mp4", result, fps=4)
 ```
 
 
+#### SDXL Support
+In order to use the SDXL model when generating a video from prompt, use the `TextToVideoZeroSDXLPipeline` pipeline:
+
+```python
+import torch
+from diffusers import TextToVideoZeroSDXLPipeline
+
+model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+pipe = TextToVideoZeroSDXLPipeline.from_pretrained(
+    model_id, torch_dtype=torch.float16, variant="fp16", use_safetensors=True
+).to("cuda")
+```
+
 ### Text-To-Video with Pose Control
 To generate a video from prompt with additional pose control
 
@@ -257,4 +270,4 @@ You can filter out some available DreamBooth-trained models with [this link](htt
 	- __call__
 
 ## TextToVideoPipelineOutput
-[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput
+[[autodoc]] pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoPipelineOutput
diff --git a/src/diffusers/__init__.py b/src/diffusers/__init__.py
@@ -202,6 +202,7 @@
         StableUnCLIPPipeline,
         TextToVideoSDPipeline,
         TextToVideoZeroPipeline,
+        TextToVideoZeroSDXLPipeline,
         UnCLIPImageVariationPipeline,
         UnCLIPPipeline,
         UniDiffuserModel,

diff --git a/src/diffusers/pipelines/__init__.py b/src/diffusers/pipelines/__init__.py
@@ -119,7 +119,12 @@
         StableDiffusionXLPipeline,
     )
     from .t2i_adapter import StableDiffusionAdapterPipeline, StableDiffusionXLAdapterPipeline
-    from .text_to_video_synthesis import TextToVideoSDPipeline, TextToVideoZeroPipeline, VideoToVideoSDPipeline
+    from .text_to_video_synthesis import (
+        TextToVideoSDPipeline,
+        TextToVideoZeroPipeline,
+        TextToVideoZeroSDXLPipeline,
+        VideoToVideoSDPipeline,
+    )
     from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline
     from .unidiffuser import ImageTextPipelineOutput, UniDiffuserModel, UniDiffuserPipeline, UniDiffuserTextDecoder
     from .versatile_diffusion import (

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py
@@ -425,10 +425,8 @@ def run_safety_checker(self, image, device, dtype):
 
     def decode_latents(self, latents):
         warnings.warn(
-            (
-                "The decode_latents method is deprecated and will be removed in a future version. Please"
-                " use VaeImageProcessor instead"
-            ),
+            "The decode_latents method is deprecated and will be removed in a future version. Please"
+            " use VaeImageProcessor instead",
             FutureWarning,
         )
         latents = 1 / self.vae.config.scaling_factor * latents

diff --git a/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py b/src/diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py
@@ -426,10 +426,8 @@ def run_safety_checker(self, image, device, dtype):
 
     def decode_latents(self, latents):
         warnings.warn(
-            (
-                "The decode_latents method is deprecated and will be removed in a future version. Please"
-                " use VaeImageProcessor instead"
-            ),
+            "The decode_latents method is deprecated and will be removed in a future version. Please"
+            " use VaeImageProcessor instead",
             FutureWarning,
         )
         latents = 1 / self.vae.config.scaling_factor * latents

diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py
@@ -35,10 +35,8 @@
 # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.preprocess with 8->64
 def preprocess(image):
     warnings.warn(
-        (
-            "The preprocess method is deprecated and will be removed in a future version. Please"
-            " use VaeImageProcessor.preprocess instead"
-        ),
+        "The preprocess method is deprecated and will be removed in a future version. Please"
+        " use VaeImageProcessor.preprocess instead",
         FutureWarning,
     )
     if isinstance(image, torch.Tensor):

diff --git a/src/diffusers/pipelines/text_to_video_synthesis/__init__.py b/src/diffusers/pipelines/text_to_video_synthesis/__init__.py
@@ -30,3 +30,4 @@ class TextToVideoSDPipelineOutput(BaseOutput):
     from .pipeline_text_to_video_synth import TextToVideoSDPipeline
     from .pipeline_text_to_video_synth_img2img import VideoToVideoSDPipeline  # noqa: F401
     from .pipeline_text_to_video_zero import TextToVideoZeroPipeline
+    from .pipeline_text_to_video_zero_sdxl import TextToVideoZeroSDXLPipeline