From 29380ac511291be86f01aed8fbdc8a9e927e8043 Mon Sep 17 00:00:00 2001 From: serial-lens Date: Sat, 10 Sep 2022 13:18:37 -0700 Subject: [PATCH 1/2] Adding support for .bin files from huggingface concepts --- ldm/modules/embedding_manager.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/ldm/modules/embedding_manager.py b/ldm/modules/embedding_manager.py index 128b575e9bb..b579bcd885b 100644 --- a/ldm/modules/embedding_manager.py +++ b/ldm/modules/embedding_manager.py @@ -24,9 +24,9 @@ def get_clip_token_for_string(tokenizer, string): return_tensors='pt', ) tokens = batch_encoding['input_ids'] - assert ( + """ assert ( torch.count_nonzero(tokens - 49407) == 2 - ), f"String '{string}' maps to more than a single token. Please use another string" + ), f"String '{string}' maps to more than a single token. Please use another string" """ return tokens[0, 1] @@ -57,8 +57,9 @@ def __init__( ): super().__init__() - self.string_to_token_dict = {} + self.embedder = embedder + self.string_to_token_dict = {} self.string_to_param_dict = nn.ParameterDict() self.initial_embeddings = ( @@ -217,12 +218,28 @@ def save(self, ckpt_path): def load(self, ckpt_path, full=True): ckpt = torch.load(ckpt_path, map_location='cpu') - self.string_to_token_dict = ckpt["string_to_token"] - self.string_to_param_dict = ckpt["string_to_param"] + + # Handle .pt textual inversion files + if 'string_to_token' in ckpt and 'string_to_param' in ckpt: + self.string_to_token_dict = ckpt["string_to_token"] + self.string_to_param_dict = ckpt["string_to_param"] + + # Handle .bin textual inversion files from Huggingface Concepts + # https://huggingface.co/sd-concepts-library + else: + for token_str in list(ckpt.keys()): + token = get_clip_token_for_string(self.embedder.tokenizer, token_str) + self.string_to_token_dict[token_str] = token + ckpt[token_str] = torch.nn.Parameter(ckpt[token_str]) + + self.string_to_param_dict.update(ckpt) + if not full: for key, value in self.string_to_param_dict.items(): self.string_to_param_dict[key] = torch.nn.Parameter(value.half()) + print(f'Added terms: {", ".join(self.string_to_param_dict.keys())}') + def get_embedding_norms_squared(self): all_params = torch.cat( list(self.string_to_param_dict.values()), axis=0 From 12781c78d8b128de397374f9c344b5ac756102f0 Mon Sep 17 00:00:00 2001 From: chromaticist Date: Sun, 11 Sep 2022 11:44:22 -0700 Subject: [PATCH 2/2] Updating documentation to include huggingface .bin info --- docs/features/TEXTUAL_INVERSION.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/features/TEXTUAL_INVERSION.md b/docs/features/TEXTUAL_INVERSION.md index 0bf13980c83..1cc18f25152 100644 --- a/docs/features/TEXTUAL_INVERSION.md +++ b/docs/features/TEXTUAL_INVERSION.md @@ -1,6 +1,8 @@ # **Personalizing Text-to-Image Generation** -You may personalize the generated images to provide your own styles or objects by training a new LDM checkpoint and introducing a new vocabulary to the fixed model. +You may personalize the generated images to provide your own styles or objects by training a new LDM checkpoint and introducing a new vocabulary to the fixed model as a (.pt) embeddings file. Alternatively, you may use or train HuggingFace Concepts embeddings files (.bin) from https://huggingface.co/sd-concepts-library and its associated notebooks. + +**Training** To train, prepare a folder that contains images sized at 512x512 and execute the following: @@ -26,9 +28,11 @@ On a RTX3090, the process for SD will take ~1h @1.6 iterations/sec. _Note_: According to the associated paper, the optimal number of images is 3-5. Your model may not converge if you use more images than that. -Training will run indefinately, but you may wish to stop it before the heat death of the universe, when you find a low loss epoch or around ~5000 iterations. +Training will run indefinitely, but you may wish to stop it before the heat death of the universe, when you find a low loss epoch or around ~5000 iterations. + +**Running** -Once the model is trained, specify the trained .pt file when starting dream using +Once the model is trained, specify the trained .pt or .bin file when starting dream using ``` (ldm) ~/stable-diffusion$ python3 ./scripts/dream.py --embedding_path /path/to/embedding.pt --full_precision @@ -46,7 +50,7 @@ This also works with image2image dream> "waterfall and rainbow in the style of *" --init_img=./init-images/crude_drawing.png --strength=0.5 -s100 -n4 ``` -It's also possible to train multiple token (modify the placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine LDM checkpoints using: +For .pt files it's also possible to train multiple tokens (modify the placeholder string in `configs/stable-diffusion/v1-finetune.yaml`) and combine LDM checkpoints using: ``` (ldm) ~/stable-diffusion$ python3 ./scripts/merge_embeddings.py \