huggingface · stevhliu · Mar 20, 2025 · Mar 19, 2025 · Mar 20, 2025 · Mar 20, 2025
diff --git a/diffusers/composable_stable_diffusion.ipynb b/diffusers/composable_stable_diffusion.ipynb
@@ -0,0 +1,155 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Composable Stable diffusion\n",
+    "\n",
+    "[Composable Stable Diffusion](https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/) proposes conjunction and negation (negative prompts) operators for compositional generation with conditional diffusion models. This script was contributed by [MarkRich](https://github.com/MarkRich) and the notebook by [Parag Ekbote](https://github.com/ParagEkbote)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install torch numpy torchvision diffusers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3dadcf1262e0492cafe9556f62ba3a9f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "composable_stable_diffusion.py:   0%|          | 0.00/27.6k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "279a467d562041ec935edacbf177caba",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "composing ['mystical trees', 'A magical pond', 'dark']...\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3717298308004b648b65d6c1b1e02dbe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/50 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Image saved successfully!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch as th\n",
+    "import numpy as np\n",
+    "import torchvision.utils as tvu\n",
+    "from diffusers import DiffusionPipeline\n",
+    "import argparse\n",
+    "import sys\n",
+    "\n",
+    "# Simulate passing arguments explicitly (bypassing Jupyter's arguments)\n",
+    "sys.argv = [\n",
+    "    \"ipykernel_launcher.py\", \n",
+    "    \"--prompt\", \"mystical trees | A magical pond | dark\",\n",
+    "    \"--steps\", \"50\",\n",
+    "    \"--scale\", \"7.5\",\n",
+    "    \"--weights\", \"7.5 | 7.5 | -7.5\",\n",
+    "    \"--seed\", \"2\",\n",
+    "    \"--model_path\", \"CompVis/stable-diffusion-v1-4\",\n",
+    "    \"--num_images\", \"1\"\n",
+    "]\n",
+    "\n",
+    "parser = argparse.ArgumentParser()\n",
+    "parser.add_argument(\"--prompt\", type=str, default=\"mystical trees | A magical pond | dark\",\n",
+    "                    help=\"use '|' as the delimiter to compose separate sentences.\")\n",
+    "parser.add_argument(\"--steps\", type=int, default=50)\n",
+    "parser.add_argument(\"--scale\", type=float, default=7.5)\n",
+    "parser.add_argument(\"--weights\", type=str, default=\"7.5 | 7.5 | -7.5\")\n",
+    "parser.add_argument(\"--seed\", type=int, default=2)\n",
+    "parser.add_argument(\"--model_path\", type=str, default=\"CompVis/stable-diffusion-v1-4\")\n",
+    "parser.add_argument(\"--num_images\", type=int, default=1)\n",
+    "args = parser.parse_args()\n",
+    "\n",
+    "# CUDA Setup\n",
+    "has_cuda = th.cuda.is_available()\n",
+    "device = th.device('cpu' if not has_cuda else 'cuda')\n",
+    "\n",
+    "# Assign parameters\n",
+    "prompt = args.prompt\n",
+    "scale = args.scale\n",
+    "steps = args.steps\n",
+    "\n",
+    "# Load pipeline\n",
+    "pipe = DiffusionPipeline.from_pretrained(\n",
+    "    args.model_path,\n",
+    "    custom_pipeline=\"composable_stable_diffusion\",\n",
+    ").to(device)\n",
+    "\n",
+    "# Disable safety checker (if intentional for internal use)\n",
+    "pipe.safety_checker = None\n",
+    "\n",
+    "# Generate images\n",
+    "images = []\n",
+    "generator = th.Generator(\"cuda\").manual_seed(args.seed)\n",
+    "for i in range(args.num_images):\n",
+    "    image = pipe(prompt, guidance_scale=scale, num_inference_steps=steps,\n",
+    "                 weights=args.weights, generator=generator).images[0]\n",
+    "    images.append(th.from_numpy(np.array(image)).permute(2, 0, 1) / 255.)\n",
+    "\n",
+    "# Create and save image grid\n",
+    "grid = tvu.make_grid(th.stack(images, dim=0), nrow=4, padding=0)\n",
+    "tvu.save_image(grid, f'{prompt}_{args.weights}.png')\n",
+    "\n",
+    "print(\"Image saved successfully!\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/diffusers/image_to_image_inpainting_stable_diffusion.ipynb b/diffusers/image_to_image_inpainting_stable_diffusion.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Image to Image Inpainting Stable Diffusion\n",
+    "\n",
+    "Similar to the standard stable diffusion inpainting example, except with the addition of an `inner_image` argument.\n",
+    "\n",
+    "`image`, `inner_image`, and `mask` should have the same dimensions. `inner_image` should have an alpha (transparency) channel.\n",
+    "\n",
+    "The aim is to overlay two images, then mask out the boundary between `image` and `inner_image` to allow stable diffusion to make the connection more seamless. For example, this could be used to place a logo on a shirt and make it blend seamlessly.This script was contributed by [Alex McKinney](https://github.com/vvvm23) and the notebook by [Parag Ekbote](https://github.com/ParagEkbote)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pip install diffusers torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0b56a945eb5145598c4fd153bc658786",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "An error occurred while trying to fetch /home/zeus/.cache/huggingface/hub/models--stable-diffusion-v1-5--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet: Error no file named diffusion_pytorch_model.safetensors found in directory /home/zeus/.cache/huggingface/hub/models--stable-diffusion-v1-5--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/unet.\n",
+      "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "An error occurred while trying to fetch /home/zeus/.cache/huggingface/hub/models--stable-diffusion-v1-5--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae: Error no file named diffusion_pytorch_model.safetensors found in directory /home/zeus/.cache/huggingface/hub/models--stable-diffusion-v1-5--stable-diffusion-inpainting/snapshots/8a4288a76071f7280aedbdb3253bdb9e9d5d84bb/vae.\n",
+      "Defaulting to unsafe serialization. Pass `allow_pickle=False` to raise an error instead.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cfa34d5822784b449f9014f3f1b0e4ef",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/50 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import requests\n",
+    "from PIL import Image\n",
+    "from io import BytesIO\n",
+    "from diffusers import DiffusionPipeline\n",
+    "\n",
+    "# Correct image URLs\n",
+    "image_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png\"\n",
+    "inner_image_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png\"\n",
+    "mask_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png\"\n",
+    "\n",
+    "# Function to load image from URL\n",
+    "def load_image(url, mode=\"RGB\"):\n",
+    "    response = requests.get(url)\n",
+    "    if response.status_code == 200:\n",
+    "        return Image.open(BytesIO(response.content)).convert(mode).resize((512, 512))\n",
+    "    else:\n",
+    "        raise FileNotFoundError(f\"Could not retrieve image from {url}\")\n",
+    "\n",
+    "# Load images\n",
+    "init_image = load_image(image_url, mode=\"RGB\")\n",
+    "inner_image = load_image(inner_image_url, mode=\"RGBA\")\n",
+    "mask_image = load_image(mask_url, mode=\"RGB\")\n",
+    "\n",
+    "# Load the pipeline\n",
+    "pipe = DiffusionPipeline.from_pretrained(\n",
+    "    \"stable-diffusion-v1-5/stable-diffusion-inpainting\",\n",
+    "    custom_pipeline=\"img2img_inpainting\",\n",
+    "    torch_dtype=torch.float16\n",
+    ")\n",
+    "pipe = pipe.to(\"cuda\")\n",
+    "\n",
+    "# Inpainting\n",
+    "prompt = \"a mecha robot sitting on a bench\"\n",
+    "image = pipe(prompt=prompt, image=init_image, inner_image=inner_image, mask_image=mask_image).images[0]\n",
+    "\n",
+    "image.save(\"output.png\")\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}