Merge branch 'development' into main

lstein · web-flow · commit 3a2be621f36e · 2022-09-04T08:15:51.000-04:00
diff --git a/README-Mac-MPS.md b/README-Mac-MPS.md
@@ -320,3 +320,20 @@ something that depends on it-- Rosetta can translate some Intel instructions but
 not the specialized ones here. To avoid this, make sure to use the environment
 variable `CONDA_SUBDIR=osx-arm64`, which restricts the Conda environment to only
 use ARM packages, and use `nomkl` as described above.
+
+### input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
+
+May appear when just starting to generate, e.g.:
+
+```
+dream> clouds
+Generating:   0%|                                                              | 0/1 [00:00<?, ?it/s]/Users/[...]/dev/stable-diffusion/ldm/modules/embedding_manager.py:152: UserWarning: The operator 'aten::nonzero' is not currently supported on the MPS backend and will fall back to run on the CPU. This may have performance implications. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1662016319283/work/aten/src/ATen/mps/MPSFallback.mm:11.)
+  placeholder_idx = torch.where(
+                                                                                                    loc("mps_add"("(mpsFileLoc): /AppleInternal/Library/BuildRoots/20d6c351-ee94-11ec-bcaf-7247572f23b4/Library/Caches/com.apple.xbs/Sources/MetalPerformanceShadersGraph/mpsgraph/MetalPerformanceShadersGraph/Core/Files/MPSGraphUtilities.mm":219:0)): error: input types 'tensor<2x1280xf32>' and 'tensor<*xf16>' are not broadcast compatible
+LLVM ERROR: Failed to infer result type(s).
+Abort trap: 6
+/Users/[...]/opt/anaconda3/envs/ldm/lib/python3.9/multiprocessing/resource_tracker.py:216: UserWarning: resource_tracker: There appear to be 1 leaked semaphore objects to clean up at shutdown
+  warnings.warn('resource_tracker: There appear to be %d '
+  ```
+
+Macs do not support autocast/mixed-precision. Supply `--full_precision` to use float32 everywhere.
diff --git a/README.md b/README.md
@@ -138,6 +138,13 @@ You may also pass a -v<count> option to generate count variants on the original
 passing the first generated image back into img2img the requested number of times. It generates interesting
 variants.
 
+## Seamless Tiling
+
+The seamless tiling mode causes generated images to seamlessly tile with itself. To use it, add the --seamless option when starting the script which will result in all generated images to tile, or for each dream> prompt as shown here:
+```
+dream> "pond garden with lotus by claude monet" --seamless -s100 -n4
+```
+
 ## GFPGAN and Real-ESRGAN Support
 
 The script also provides the ability to do face restoration and
@@ -400,7 +407,11 @@ repository and associated paper for details and limitations.
 
 # Latest Changes
 
-- v1.13 (3 September 2022)
+- v1.14 (In progress)
+
+  - Add "seamless mode" for circular tiling of image. Generates beautiful effects. ([prixt](https://github.com/prixt))
+
+- v1.13 (3 September 2022
 
   - Support image variations (see [VARIATIONS](VARIATIONS.md) ([Kevin Gibbons](https://github.com/bakkot) and many contributors and reviewers)
   - Supports a Google Colab notebook for a standalone server running on Google hardware [Arturo Mendivil](https://github.com/artmen1516)
diff --git a/VARIATIONS.md b/VARIATIONS.md
@@ -108,6 +108,6 @@ the chosen two images. Here's the one I like best:
 
 <img src="static/variation_walkthru/000004.3747154981.png">
 
-As you can see, this is a very powerful too, which when combined with
+As you can see, this is a very powerful tool, which when combined with
 subprompt weighting, gives you great control over the content and
 quality of your generated images.
diff --git a/environment-mac.yaml b/environment-mac.yaml
@@ -1,33 +1,29 @@
 name: ldm
 channels:
-  - pytorch-nightly
+  - pytorch
   - conda-forge
 dependencies:
-  - python==3.9.13
+  - python==3.10.5
   - pip==22.2.2
  
-  # pytorch-nightly, left unpinned
+  # pytorch left unpinned
   - pytorch
-  - torchmetrics
   - torchvision
 
   # I suggest to keep the other deps sorted for convenience.
-  # If you wish to upgrade to 3.10, try to run this:
+  # To determine what the latest versions should be, run:
   #
   # ```shell
-  # CONDA_CMD=conda
-  # sed -E 's/python==3.9.13/python==3.10.5/;s/ldm/ldm-3.10/;21,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > /tmp/environment-mac-updated.yml
-  # CONDA_SUBDIR=osx-arm64 $CONDA_CMD env create -f /tmp/environment-mac-updated.yml && $CONDA_CMD list -n ldm-3.10 | awk ' {print "  - " $1 "==" $2;} '
+  # sed -E 's/ldm/ldm-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yaml > environment-mac-updated.yml
+  # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n ldm-updated | awk ' {print "  - " $1 "==" $2;} '
   # ```
-  #
-  # Unfortunately, as of 2022-08-31, this fails at the pip stage.
   - albumentations==1.2.1
   - coloredlogs==15.0.1
   - einops==0.4.1
   - grpcio==1.46.4
-  - humanfriendly
-  - imageio-ffmpeg==0.4.7
+  - humanfriendly==10.0
   - imageio==2.21.2
+  - imageio-ffmpeg==0.4.7
   - imgaug==0.4.0
   - kornia==0.6.7
   - mpmath==1.2.1
@@ -43,13 +39,11 @@ dependencies:
   - streamlit==1.12.2
   - sympy==1.10.1
   - tensorboard==2.9.0
-  - transformers==4.21.2
+  - torchmetrics==0.9.3
   - pip:
-    - invisible-watermark
-    - test-tube
-    - tokenizers
-    - torch-fidelity
-    - -e git+https://github.com/huggingface/diffusers.git@v0.2.4#egg=diffusers
+    - test-tube==0.7.5
+    - transformers==4.21.2
+    - torch-fidelity==0.3.0
     - -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
     - -e git+https://github.com/openai/CLIP.git@main#egg=clip
     - -e git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion
diff --git a/ldm/dream/pngwriter.py b/ldm/dream/pngwriter.py
@@ -59,6 +59,8 @@ def normalize_prompt(self):
         switches.append(f'-H{opt.height       or t2i.height}')
         switches.append(f'-C{opt.cfg_scale    or t2i.cfg_scale}')
         switches.append(f'-A{opt.sampler_name or t2i.sampler_name}')
+        if opt.seamless or t2i.seamless:
+            switches.append(f'--seamless')
         if opt.init_img:
             switches.append(f'-I{opt.init_img}')
         if opt.fit:
diff --git a/ldm/dream/server.py b/ldm/dream/server.py
@@ -76,7 +76,8 @@ def do_POST(self):
         steps = int(post_data['steps'])
         width = int(post_data['width'])
         height = int(post_data['height'])
-        fit    = 'fit' in post_data
+        fit      = 'fit' in post_data
+        seamless = 'seamless' in post_data
         cfgscale = float(post_data['cfgscale'])
         sampler_name  = post_data['sampler']
         gfpgan_strength = float(post_data['gfpgan_strength']) if gfpgan_model_exists else 0
@@ -92,7 +93,7 @@ def do_POST(self):
         # across images generated by each call to prompt2img(), so we define it in
         # the outer scope of image_done()
         config = post_data.copy() # Shallow copy
-        config['initimg'] = ''
+        config['initimg'] = config.pop('initimg_name','')
 
         images_generated = 0    # helps keep track of when upscaling is started
         images_upscaled = 0     # helps keep track of when upscaling is completed
@@ -170,6 +171,7 @@ def image_progress(sample, step):
                                         gfpgan_strength = gfpgan_strength,
                                         upscale         = upscale,
                                         sampler_name    = sampler_name,
+                                        seamless        = seamless,
                                         step_callback=image_progress,
                                         image_callback=image_done)
             else:
@@ -191,6 +193,7 @@ def image_progress(sample, step):
                                             width      = width,
                                             height     = height,
                                             fit        = fit,
+                                            seamless   = seamless,
                                             gfpgan_strength=gfpgan_strength,
                                             upscale         = upscale,
                                             step_callback=image_progress,
diff --git a/ldm/simplet2i.py b/ldm/simplet2i.py
@@ -14,6 +14,7 @@
 from tqdm import tqdm, trange
 from itertools import islice
 from einops import rearrange, repeat
+from torch import nn
 from torchvision.utils import make_grid
 from pytorch_lightning import seed_everything
 from torch import autocast
@@ -109,6 +110,7 @@ class T2I:
         downsampling_factor
         precision
         strength
+        seamless
         embedding_path
 
     The vast majority of these arguments default to reasonable values.
@@ -132,6 +134,7 @@ def __init__(
             precision='autocast',
             full_precision=False,
             strength=0.75,  # default in scripts/img2img.py
+            seamless=False,
             embedding_path=None,
             device_type = 'cuda',
             # just to keep track of this parameter when regenerating prompt
@@ -153,6 +156,7 @@ def __init__(
         self.precision                = precision
         self.full_precision           = True if choose_torch_device() == 'mps' else full_precision
         self.strength                 = strength
+        self.seamless                 = seamless
         self.embedding_path           = embedding_path
         self.device_type              = device_type
         self.model                    = None     # empty for now
@@ -217,6 +221,7 @@ def prompt2image(
             step_callback  =    None,
             width          =    None,
             height         =    None,
+            seamless       =    False,
             # these are specific to img2img
             init_img       =    None,
             fit            =    False,
@@ -240,6 +245,7 @@ def prompt2image(
            width                           // width of image, in multiples of 64 (512)
            height                          // height of image, in multiples of 64 (512)
            cfg_scale                       // how strongly the prompt influences the image (7.5) (must be >1)
+           seamless                        // whether the generated image should tile
            init_img                        // path to an initial image - its dimensions override width and height
            strength                        // strength for noising/unnoising init_img. 0.0 preserves image exactly, 1.0 replaces it completely
            gfpgan_strength                 // strength for GFPGAN. 0.0 preserves image exactly, 1.0 replaces it completely
@@ -268,6 +274,7 @@ def process_image(image,seed):
         steps                 = steps      or self.steps
         width                 = width      or self.width
         height                = height     or self.height
+        seamless              = seamless   or self.seamless
         cfg_scale             = cfg_scale  or self.cfg_scale
         ddim_eta              = ddim_eta   or self.ddim_eta
         iterations            = iterations or self.iterations
@@ -278,6 +285,10 @@ def process_image(image,seed):
         model = (
             self.load_model()
         )  # will instantiate the model or return it from cache
+        for m in model.modules():
+            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                m.padding_mode = 'circular' if seamless else m._orig_padding_mode
+        
         assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0'
         assert (
             0.0 <= strength <= 1.0
@@ -324,7 +335,6 @@ def process_image(image,seed):
                         self.model.encode_first_stage(init_image)
                     ) # move to latent space
 
-                print(f' DEBUG: seed at make_image time ={seed}')
                 make_image = self._img2img(
                     prompt,
                     steps=steps,
@@ -413,10 +423,7 @@ def process_image(image,seed):
                                 f'>> Error running RealESRGAN - Your image was not upscaled.\n{e}'
                             )
                         if image_callback is not None:
-                            if save_original:
-                                image_callback(image, seed)
-                            else:
-                                image_callback(image, seed, upscaled=True)
+                            image_callback(image, seed, upscaled=True)
                         else:  # no callback passed, so we simply replace old image with rescaled one
                             result[0] = image
 
@@ -604,6 +611,10 @@ def load_model(self):
 
             self._set_sampler()
 
+            for m in self.model.modules():
+                if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                    m._orig_padding_mode = m.padding_mode
+
         return self.model
 
     # returns a tensor filled with random numbers from a normal distribution
diff --git a/scripts/dream.py b/scripts/dream.py
@@ -62,6 +62,7 @@ def main():
         grid  = opt.grid,
         # this is solely for recreating the prompt
         latent_diffusion_weights=opt.laion400m,
+        seamless=opt.seamless,
         embedding_path=opt.embedding_path,
         device_type=opt.device
     )
@@ -87,6 +88,9 @@ def main():
             print(f'{e}. Aborting.')
             sys.exit(-1)
 
+    if opt.seamless:
+        print(">> changed to seamless tiling mode")
+
     # preload the model
     tic = time.time()
     t2i.load_model()
@@ -418,6 +422,11 @@ def create_argv_parser():
         default='outputs/img-samples',
         help='Directory to save generated images and a log of prompts and seeds. Default: outputs/img-samples',
     )
+    parser.add_argument(
+        '--seamless',
+        action='store_true',
+        help='Change the model to seamless tiling (circular) mode',
+    )
     parser.add_argument(
         '--embedding_path',
         type=str,
@@ -540,6 +549,11 @@ def create_cmd_parser():
         default=None,
         help='Directory to save generated images and a log of prompts and seeds',
     )
+    parser.add_argument(
+        '--seamless',
+        action='store_true',
+        help='Change the model to seamless tiling (circular) mode',
+    )
     parser.add_argument(
         '-i',
         '--individual',
diff --git a/static/dream_web/index.html b/static/dream_web/index.html
@@ -37,6 +37,8 @@ <h2 id="header">Stable Diffusion Dream Server</h2>
 	      <option value="k_euler_a">KEULER_A</option>
               <option value="k_heun">KHEUN</option>
             </select>
+            <input type="checkbox" name="seamless" id="seamless">
+	    <label for="seamless">Seamless circular tiling</label>
             <br>
             <label title="Set to multiple of 64" for="width">Width:</label>
             <select id="width" name="width" value="512">
@@ -64,7 +66,7 @@ <h2 id="header">Stable Diffusion Dream Server</h2>
             <input value="-1" type="number" id="seed" name="seed">
             <button type="button" id="reset-seed">&olarr;</button>
             <input type="checkbox" name="progress_images" id="progress_images">
-	    <label for="progress_images">Display in-progress images (slows down generation):</label>
+	    <label for="progress_images">Display in-progress images (slower)</label>
 	    <button type="button" id="reset-all">Reset to Defaults</button>
 	</div>
 	<div id="img2img">
@@ -74,7 +76,7 @@ <h2 id="header">Stable Diffusion Dream Server</h2>
           <label for="strength">Img2Img Strength:</label>
           <input value="0.75" type="number" id="strength" name="strength" step="0.01" min="0" max="1">
           <input type="checkbox" id="fit" name="fit" checked>
-          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height:</label>
+          <label title="Rescale image to fit within requested width and height" for="fit">Fit to width/height</label>
 	</div>
         <div id="gfpgan">
           <label title="Strength of the gfpgan (face fixing) algorithm." for="gfpgan_strength">GPFGAN Strength (0 to disable):</label>
diff --git a/static/dream_web/index.js b/static/dream_web/index.js
@@ -19,7 +19,8 @@ function appendOutput(src, seed, config) {
     outputNode.addEventListener('click', () => {
         let form = document.querySelector("#generate-form");
         for (const [k, v] of new FormData(form)) {
-            form.querySelector(`*[name=${k}]`).value = config[k];
+	    if (k == 'initimg') { continue; }
+	    form.querySelector(`*[name=${k}]`).value = config[k];
         }
         document.querySelector("#seed").value = seed;
 
@@ -59,6 +60,7 @@ async function generateSubmit(form) {
 
     // Convert file data to base64
     let formData = Object.fromEntries(new FormData(form));
+    formData.initimg_name = formData.initimg.name
     formData.initimg = formData.initimg.name !== '' ? await toBase64(formData.initimg) : null;
 
     let strength = formData.strength;