Add arg compile_unet (huggingface#17)

jackalcooper · web-flow · commit dc014043e96d · 2022-11-08T14:34:08.000+08:00
diff --git a/src/diffusers/models/unet_blocks_oneflow.py b/src/diffusers/models/unet_blocks_oneflow.py
@@ -371,7 +371,8 @@ def set_attention_slice(self, slice_size):
 
     def forward(self, hidden_states, temb=None, encoder_hidden_states=None):
         hidden_states = self.resnets[0](hidden_states, temb)
-        for attn, resnet in zip(self.attentions, self.resnets[1:]):
+        resnets_list = [m for m in self.resnets]
+        for attn, resnet in zip(self.attentions, resnets_list[1:]):
             hidden_states = attn(hidden_states, encoder_hidden_states)
             hidden_states = resnet(hidden_states, temb)
 
diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_oneflow.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_oneflow.py
@@ -142,6 +142,7 @@ def __call__(
         latents: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
+        compile_unet: bool = True,
         **kwargs,
     ):
         r"""
@@ -179,6 +180,8 @@ def __call__(
             return_dict (`bool`, *optional*, defaults to `True`):
                 Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
                 plain tuple.
+            compile_unet (`bool`, *optional*, defaults to `True`):
+                Whether or not to compile unet as nn.graph
 
         Returns:
             [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
@@ -278,15 +281,16 @@ def __call__(
 
         compilation_start = timer()
         compilation_time = 0
-        if self.unet_compiled == False:
-            print("[oneflow]", "compiling unet beforehand to make sure the progress bar is more accurate")
-            i, t = list(enumerate(self.scheduler.timesteps))[0]
-            latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
-            self.unet_graph._compile(latent_model_input, t, text_embeddings)
-            self.unet_compiled = True
-            self.unet_graph(latent_model_input, t, text_embeddings) # warmup
-            compilation_time = timer() - compilation_start
-            print("[oneflow]", "[elapsed(s)]", "[unet compilation]", compilation_time)
+        if compile_unet:
+            if self.unet_compiled == False:
+                print("[oneflow]", "compiling unet beforehand to make sure the progress bar is more accurate")
+                i, t = list(enumerate(self.scheduler.timesteps))[0]
+                latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+                self.unet_graph._compile(latent_model_input, t, text_embeddings)
+                self.unet_compiled = True
+                self.unet_graph(latent_model_input, t, text_embeddings) # warmup
+                compilation_time = timer() - compilation_start
+                print("[oneflow]", "[elapsed(s)]", "[unet compilation]", compilation_time)
 
         for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)):
             torch._oneflow_internal.profiler.RangePush(f"denoise-{i}")
@@ -298,9 +302,12 @@ def __call__(
                 latent_model_input = latent_model_input / ((sigma**2 + 1) ** 0.5)
 
             # predict the noise residual
-            torch._oneflow_internal.profiler.RangePush(f"denoise-{i}-unet-graph")
-            noise_pred = self.unet_graph(latent_model_input, t, text_embeddings)
-            torch._oneflow_internal.profiler.RangePop()
+            if compile_unet:
+                torch._oneflow_internal.profiler.RangePush(f"denoise-{i}-unet-graph")
+                noise_pred = self.unet_graph(latent_model_input, t, text_embeddings)
+                torch._oneflow_internal.profiler.RangePop()
+            else:
+                noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
 
             # perform guidance
             if do_classifier_free_guidance: