huggingface · yiyixuxu · Feb 13, 2024 · Feb 12, 2024 · Feb 12, 2024 · Feb 12, 2024
diff --git a/src/diffusers/loaders/autoencoder.py b/src/diffusers/loaders/autoencoder.py
@@ -92,6 +92,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
         """
 
         original_config_file = kwargs.pop("original_config_file", None)
+        config_file = kwargs.pop("config_file", None)
         resume_download = kwargs.pop("resume_download", False)
         force_download = kwargs.pop("force_download", False)
         proxies = kwargs.pop("proxies", None)
@@ -103,6 +104,13 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
         use_safetensors = kwargs.pop("use_safetensors", True)
 
         class_name = cls.__name__
+
+        if (config_file is not None) and (original_config_file is not None):
+            raise ValueError(
+                "You cannot pass both `config_file` and `original_config_file` to `from_single_file`. Please use only one of these arguments."
+            )
+
+        original_config_file = original_config_file or config_file
         original_config, checkpoint = fetch_ldm_config_and_checkpoint(
             pretrained_model_link_or_path=pretrained_model_link_or_path,
             class_name=class_name,
@@ -118,7 +126,10 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
         )
 
         image_size = kwargs.pop("image_size", None)
-        component = create_diffusers_vae_model_from_ldm(class_name, original_config, checkpoint, image_size=image_size)
+        scaling_factor = kwargs.pop("scaling_factor", None)
+        component = create_diffusers_vae_model_from_ldm(
+            class_name, original_config, checkpoint, image_size=image_size, scaling_factor=scaling_factor
+        )
         vae = component["vae"]
         if torch_dtype is not None:
             vae = vae.to(torch_dtype)

diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py
@@ -175,6 +175,7 @@
 }
 
 LDM_VAE_KEY = "first_stage_model."
+LDM_VAE_DEFAULT_SCALING_FACTOR = 0.18215
 LDM_UNET_KEY = "model.diffusion_model."
 LDM_CONTROLNET_KEY = "control_model."
 LDM_CLIP_PREFIX_TO_REMOVE = ["cond_stage_model.transformer.", "conditioner.embedders.0.transformer."]
@@ -518,7 +519,9 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None
     Creates a config for the diffusers based on the config of the LDM model.
     """
     vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"]
-    scaling_factor = scaling_factor or original_config["model"]["params"]["scale_factor"]
+    scaling_factor = (
+        scaling_factor or original_config["model"]["params"]["scale_factor"] or LDM_VAE_DEFAULT_SCALING_FACTOR
+    )
 
     block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
     down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels)
@@ -1173,7 +1176,7 @@ def create_diffusers_unet_model_from_ldm(
 
 
 def create_diffusers_vae_model_from_ldm(
-    pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=0.18125
+    pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=None
 ):
     # import here to avoid circular imports
     from ..models import AutoencoderKL