Skip to content

Commit 023d436

Browse files
author
andres
committed
..
1 parent 42efd70 commit 023d436

File tree

4 files changed

+473
-0
lines changed

4 files changed

+473
-0
lines changed

controlnet-adapter-inpaint.ipynb

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import cv2\n",
10+
"import numpy as np\n",
11+
"import torch\n",
12+
"from controlnet_aux.midas import MidasDetector\n",
13+
"from PIL import Image\n",
14+
"\n",
15+
"from diffusers import AutoencoderKL, ControlNetModel, MultiAdapter, T2IAdapter\n",
16+
"from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel\n",
17+
"from diffusers.utils import load_image\n",
18+
"from src.diffusers import StableDiffusionXLControlNetAdapterInpaintPipeline"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {},
25+
"outputs": [],
26+
"source": [
27+
"controlnet_depth = ControlNetModel.from_pretrained(\n",
28+
" \"diffusers/controlnet-depth-sdxl-1.0\",\n",
29+
" torch_dtype=torch.float16,\n",
30+
" variant=\"fp16\",\n",
31+
" use_safetensors=True\n",
32+
")\n",
33+
"adapter_depth = T2IAdapter.from_pretrained(\n",
34+
" \"TencentARC/t2i-adapter-depth-midas-sdxl-1.0\", torch_dtype=torch.float16, variant=\"fp16\", use_safetensors=True\n",
35+
")\n",
36+
"vae = AutoencoderKL.from_pretrained(\"madebyollin/sdxl-vae-fp16-fix\", torch_dtype=torch.float16, use_safetensors=True)\n",
37+
"\n",
38+
"pipe = StableDiffusionXLControlNetAdapterInpaintPipeline.from_pretrained(\n",
39+
" \"diffusers/stable-diffusion-xl-1.0-inpainting-0.1\",\n",
40+
" controlnet=controlnet_depth,\n",
41+
" adapter=adapter_depth,\n",
42+
" vae=vae,\n",
43+
" variant=\"fp16\",\n",
44+
" use_safetensors=True,\n",
45+
" torch_dtype=torch.float16,\n",
46+
")\n",
47+
"pipe = pipe.to(\"cuda\")\n",
48+
"pipe.enable_xformers_memory_efficient_attention()\n",
49+
"# pipe.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2)\n",
50+
"midas_depth = MidasDetector.from_pretrained(\n",
51+
" \"valhalla/t2iadapter-aux-models\", filename=\"dpt_large_384.pt\", model_type=\"dpt_large\"\n",
52+
").to(\"cuda\")\n",
53+
"\n",
54+
"prompt = \"a tiger sitting on a park bench\"\n",
55+
"img_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png\"\n",
56+
"mask_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png\"\n",
57+
"\n",
58+
"image = load_image(img_url).resize((1024, 1024))\n",
59+
"mask_image = load_image(mask_url).resize((1024, 1024))\n",
60+
"\n",
61+
"depth_image = midas_depth(\n",
62+
" image, detect_resolution=512, image_resolution=1024\n",
63+
")\n",
64+
"\n",
65+
"strength = 0.4\n",
66+
"\n",
67+
"images = pipe(\n",
68+
" prompt,\n",
69+
" image=image,\n",
70+
" mask_image=mask_image,\n",
71+
" control_image=depth_image,\n",
72+
" adapter_image=depth_image,\n",
73+
" num_inference_steps=30,\n",
74+
" controlnet_conditioning_scale=strength,\n",
75+
" adapter_conditioning_scale=strength,\n",
76+
" strength=0.7,\n",
77+
").images"
78+
]
79+
},
80+
{
81+
"cell_type": "code",
82+
"execution_count": null,
83+
"metadata": {},
84+
"outputs": [],
85+
"source": [
86+
"controlnet_depth = ControlNetModel.from_pretrained(\n",
87+
" \"diffusers/controlnet-depth-sdxl-1.0\",\n",
88+
" torch_dtype=torch.float16,\n",
89+
" variant=\"fp16\",\n",
90+
" use_safetensors=True\n",
91+
")\n",
92+
"controlnet_canny = ControlNetModel.from_pretrained(\n",
93+
" \"diffusers/controlnet-canny-sdxl-1.0\",\n",
94+
" torch_dtype=torch.float16,\n",
95+
" variant=\"fp16\",\n",
96+
" use_safetensors=True\n",
97+
")\n",
98+
"adapter_depth = T2IAdapter.from_pretrained(\n",
99+
" \"TencentARC/t2i-adapter-depth-midas-sdxl-1.0\", torch_dtype=torch.float16, variant=\"fp16\", use_safetensors=True\n",
100+
")\n",
101+
"adapter_canny = T2IAdapter.from_pretrained(\n",
102+
" \"TencentARC/t2i-adapter-canny-sdxl-1.0\", torch_dtype=torch.float16, variant=\"fp16\", use_safetensors=True\n",
103+
")\n",
104+
"vae = AutoencoderKL.from_pretrained(\"madebyollin/sdxl-vae-fp16-fix\", torch_dtype=torch.float16, use_safetensors=True)\n",
105+
"\n",
106+
"pipe = StableDiffusionXLControlNetAdapterInpaintPipeline.from_pretrained(\n",
107+
" \"stabilityai/stable-diffusion-xl-base-1.0\",\n",
108+
" controlnet=MultiControlNetModel([controlnet_depth, controlnet_canny]),\n",
109+
" adapter=MultiAdapter([adapter_depth, adapter_canny]),\n",
110+
" vae=vae,\n",
111+
" variant=\"fp16\",\n",
112+
" use_safetensors=True,\n",
113+
" torch_dtype=torch.float16,\n",
114+
")\n",
115+
"pipe = pipe.to(\"cuda\")\n",
116+
"pipe.enable_xformers_memory_efficient_attention()\n",
117+
"# pipe.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2)\n",
118+
"midas_depth = MidasDetector.from_pretrained(\n",
119+
" \"valhalla/t2iadapter-aux-models\", filename=\"dpt_large_384.pt\", model_type=\"dpt_large\"\n",
120+
").to(\"cuda\")\n",
121+
"\n",
122+
"prompt = \"a person sitting on a bench in the park\"\n",
123+
"img_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png\"\n",
124+
"mask_url = \"https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png\"\n",
125+
"\n",
126+
"image = load_image(img_url).resize((1024, 1024))\n",
127+
"mask_image = load_image(mask_url).resize((1024, 1024))\n",
128+
"\n",
129+
"depth_image = midas_depth(\n",
130+
" image, detect_resolution=512, image_resolution=1024\n",
131+
")\n",
132+
"canny_image = Image.fromarray(cv2.Canny(np.array(image), 100, 200)).convert(\"RGB\")\n",
133+
"\n",
134+
"strength = 0.5\n",
135+
"\n",
136+
"images = pipe(\n",
137+
" prompt,\n",
138+
" mask_image=mask_image,\n",
139+
" control_image=[depth_image, canny_image],\n",
140+
" adapter_image=[depth_image, canny_image],\n",
141+
" num_inference_steps=30,\n",
142+
" controlnet_conditioning_scale=strength,\n",
143+
" adapter_conditioning_scale=strength,\n",
144+
").images"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"url = \"https://images.pexels.com/photos/6518723/pexels-photo-6518723.jpeg\"\n",
154+
"image = load_image(url)\n",
155+
"prompt = \"a man and woman sitting on a couch with party hats on. high resolution image\"\n",
156+
"negative_prompt = \"ugly, deformed\"\n",
157+
"\n",
158+
"depth_image = midas_depth(\n",
159+
" image, detect_resolution=512, image_resolution=1024\n",
160+
")\n",
161+
"only_adapter = []\n",
162+
"only_control = []\n",
163+
"combined = []\n",
164+
"combined_all = []\n",
165+
"strength = np.linspace(0.0, 1.0, 11)\n",
166+
"for control_strength in strength:\n",
167+
" for adapter_strength in strength:\n",
168+
" if adapter_strength == 0.0 and control_strength == 0.0:\n",
169+
" continue\n",
170+
" if (adapter_strength + control_strength == 1.0) or (adapter_strength == 0 or control_strength == 0):\n",
171+
" pass\n",
172+
" else:\n",
173+
" continue\n",
174+
" print(f\"adapter strength: {adapter_strength}, control strength: {control_strength}\")\n",
175+
" images = pipe(\n",
176+
" [prompt],\n",
177+
" negative_prompt=[negative_prompt],\n",
178+
" control_image=depth_image,\n",
179+
" adapter_image=depth_image,\n",
180+
" num_inference_steps=30,\n",
181+
" num_images_per_prompt=1,\n",
182+
" controlnet_conditioning_scale=control_strength,\n",
183+
" adapter_conditioning_scale=adapter_strength,\n",
184+
" guidance_scale=7.5,\n",
185+
" generator=torch.Generator().manual_seed(4)\n",
186+
" ).images[0]\n",
187+
" if adapter_strength == 0.0:\n",
188+
" only_control.append(images)\n",
189+
" elif control_strength == 0.0:\n",
190+
" only_adapter.append(images)\n",
191+
" if adapter_strength + control_strength == 1.0:\n",
192+
" combined.append(images)"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [],
200+
"source": [
201+
"from math import sqrt\n",
202+
"from diffusers.utils import make_image_grid\n",
203+
"make_image_grid(only_control, rows=1, cols=len(only_control)).save(\"only_control.jpg\", quality=95)\n",
204+
"make_image_grid(only_adapter, rows=1, cols=len(only_adapter)).save(\"only_adapter.jpg\", quality=95)\n",
205+
"make_image_grid(combined, rows=1, cols=len(combined)).save(\"combined.jpg\", quality=95)"
206+
]
207+
}
208+
],
209+
"metadata": {
210+
"kernelspec": {
211+
"display_name": "hax-cv-7iGZNdAM-py3.10",
212+
"language": "python",
213+
"name": "python3"
214+
},
215+
"language_info": {
216+
"codemirror_mode": {
217+
"name": "ipython",
218+
"version": 3
219+
},
220+
"file_extension": ".py",
221+
"mimetype": "text/x-python",
222+
"name": "python",
223+
"nbconvert_exporter": "python",
224+
"pygments_lexer": "ipython3",
225+
"version": "3.10.10"
226+
}
227+
},
228+
"nbformat": 4,
229+
"nbformat_minor": 2
230+
}

0 commit comments

Comments
 (0)