[APPS-SD] Fix a few bugs and bring it up to speed with SD CLI (huggingface#908)

Abhishek-Varma · web-flow · commit 7cfc0fa55b43 · 2023-02-02T07:12:01.000-08:00
diff --git a/.gitignore b/.gitignore
@@ -170,6 +170,5 @@ tank/dict_configs.py
 cache_models/
 onnx_models/
 
-#web logging
-web/logs/
-web/stored_results/stable_diffusion/
+# Generated images
+generated_imgs/
diff --git a/apps/stable_diffusion/scripts/txt2img.py b/apps/stable_diffusion/scripts/txt2img.py
@@ -41,6 +41,12 @@ class Config:
     for vmfb in vmfbs:
         if os.path.exists(vmfb):
             os.remove(vmfb)
+    # Temporary workaround of deleting yaml files to incorporate diffusers' pipeline.
+    # TODO: Remove this once we have better weight updation logic.
+    inference_yaml = ["v2-inference-v.yaml", "v1-inference.yaml"]
+    for yaml in inference_yaml:
+        if os.path.exists(yaml):
+            os.remove(yaml)
     home = os.path.expanduser("~")
     if os.name == "nt":  # Windows
         appdata = os.getenv("LOCALAPPDATA")
diff --git a/apps/stable_diffusion/src/models/__init__.py b/apps/stable_diffusion/src/models/__init__.py
@@ -6,4 +6,6 @@
     get_unet,
     get_clip,
     get_tokenizer,
+    get_params,
+    get_variant_version,
 )
diff --git a/apps/stable_diffusion/src/models/model_wrappers.py b/apps/stable_diffusion/src/models/model_wrappers.py
@@ -2,14 +2,15 @@
 from transformers import CLIPTextModel
 from collections import defaultdict
 import torch
-import sys
 import traceback
 import re
+import os, sys, functools, operator
 from apps.stable_diffusion.src.utils import (
     compile_through_fx,
     get_opt_flags,
     base_models,
     args,
+    get_vmfb_path_name,
 )
 
 
@@ -68,6 +69,7 @@ def __init__(
         height: int = 512,
         batch_size: int = 1,
         use_base_vae: bool = False,
+        use_tuned: bool = False,
     ):
         self.check_params(max_len, width, height)
         self.max_len = max_len
@@ -88,13 +90,15 @@ def __init__(
             + "_"
             + precision
         )
+        self.use_tuned = use_tuned
         # We need a better naming convention for the .vmfbs because despite
         # using the custom model variant the .vmfb names remain the same and
         # it'll always pick up the compiled .vmfb instead of compiling the
         # custom model.
         # So, currently, we add `self.model_id` in the `self.model_name` of
         # .vmfb file.
         # TODO: Have a better way of naming the vmfbs using self.model_name.
+        import re
 
         model_name = re.sub(r"\W+", "_", self.model_id)
         if model_name[0] == "_":
@@ -137,6 +141,7 @@ def forward(self, input):
             vae,
             inputs,
             is_f16=is_f16,
+            use_tuned=self.use_tuned,
             model_name=vae_name + self.model_name,
             extra_args=get_opt_flags("vae", precision=self.precision),
         )
@@ -177,6 +182,7 @@ def forward(
             model_name="unet" + self.model_name,
             is_f16=is_f16,
             f16_input_mask=input_mask,
+            use_tuned=self.use_tuned,
             extra_args=get_opt_flags("unet", precision=self.precision),
         )
         return shark_unet
@@ -194,7 +200,6 @@ def forward(self, input):
                 return self.text_encoder(input)[0]
 
         clip_model = CLIPText()
-
         shark_clip = compile_through_fx(
             clip_model,
             tuple(self.inputs["clip"]),
@@ -204,6 +209,11 @@ def forward(self, input):
         return shark_clip
 
     def __call__(self):
+        model_name = ["clip", "base_vae" if self.base_vae else "vae", "unet"]
+        vmfb_path = [
+            get_vmfb_path_name(model + self.model_name)[0]
+            for model in model_name
+        ]
         for model_id in base_models:
             self.inputs = get_input_info(
                 base_models[model_id],
@@ -213,12 +223,22 @@ def __call__(self):
                 self.batch_size,
             )
             try:
-                compiled_clip = self.get_clip()
                 compiled_unet = self.get_unet()
                 compiled_vae = self.get_vae()
+                compiled_clip = self.get_clip()
             except Exception as e:
                 if args.enable_stack_trace:
                     traceback.print_exc()
+                vmfb_present = [os.path.isfile(vmfb) for vmfb in vmfb_path]
+                all_vmfb_present = functools.reduce(
+                    operator.__and__, vmfb_present
+                )
+                # We need to delete vmfbs only if some of the models were compiled.
+                if not all_vmfb_present:
+                    for i in range(len(vmfb_path)):
+                        if vmfb_present[i]:
+                            os.remove(vmfb_path[i])
+                            print("Deleted: ", vmfb_path[i])
                 print("Retrying with a different base model configuration")
                 continue
             # This is done just because in main.py we are basing the choice of tokenizer and scheduler
diff --git a/apps/stable_diffusion/src/models/opt_params.py b/apps/stable_diffusion/src/models/opt_params.py
@@ -14,6 +14,10 @@
 }
 
 
+def get_variant_version(hf_model_id):
+    return hf_model_variant_map[hf_model_id]
+
+
 def get_params(bucket_key, model_key, model, is_tuned, precision):
     iree_flags = []
     if len(args.iree_vulkan_target_triple) > 0:
@@ -60,7 +64,7 @@ def get_params(bucket_key, model_key, model, is_tuned, precision):
 
 
 def get_unet():
-    variant, version = hf_model_variant_map[args.hf_model_id]
+    variant, version = get_variant_version(args.hf_model_id)
     # Tuned model is present only for `fp16` precision.
     is_tuned = "tuned" if args.use_tuned else "untuned"
     if "vulkan" not in args.device and args.use_tuned:
@@ -77,7 +81,7 @@ def get_unet():
 
 
 def get_vae():
-    variant, version = hf_model_variant_map[args.hf_model_id]
+    variant, version = get_variant_version(args.hf_model_id)
     # Tuned model is present only for `fp16` precision.
     is_tuned = "tuned" if args.use_tuned else "untuned"
     is_base = "/base" if args.use_base_vae else ""
@@ -95,7 +99,7 @@ def get_vae():
 
 
 def get_clip():
-    variant, version = hf_model_variant_map[args.hf_model_id]
+    variant, version = get_variant_version(args.hf_model_id)
     bucket_key = f"{variant}/untuned"
     model_key = (
         f"{variant}/{version}/clip/fp32/length_{args.max_length}/untuned"
diff --git a/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py b/apps/stable_diffusion/src/pipelines/pipeline_shark_stable_diffusion_utils.py
@@ -185,10 +185,12 @@ def from_pretrained(
         width: int,
         use_base_vae: bool,
     ):
-        init_kwargs = None
         if import_mlir:
-            if ckpt_loc:
-                preprocessCKPT()
+            if ckpt_loc != "":
+                assert ckpt_loc.lower().endswith(
+                    (".ckpt", ".safetensors")
+                ), "checkpoint files supported can be any of [.ckpt, .safetensors] type"
+                ckpt_loc = preprocessCKPT()
             mlir_import = SharkifyStableDiffusionModel(
                 model_id,
                 ckpt_loc,
diff --git a/apps/stable_diffusion/src/utils/__init__.py b/apps/stable_diffusion/src/utils/__init__.py
@@ -9,8 +9,10 @@
     opt_flags,
     resource_path,
 )
+from apps.stable_diffusion.src.utils.sd_annotation import sd_model_annotation
 from apps.stable_diffusion.src.utils.stable_args import args
 from apps.stable_diffusion.src.utils.utils import (
+    get_vmfb_path_name,
     get_shark_model,
     compile_through_fx,
     set_iree_runtime_flags,
diff --git a/apps/stable_diffusion/src/utils/resources/opt_flags.json b/apps/stable_diffusion/src/utils/resources/opt_flags.json
@@ -1,95 +1,101 @@
-  {
-    "unet": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": []
-        },
-        "fp32": {
-          "default_compilation_flags": []
-        }
+{
+  "unet": {
+    "tuned": {
+      "fp16": {
+        "default_compilation_flags": []
       },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32"
-          ],
-          "specified_compilation_flags": {
-            "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
-            "default_device": ["--iree-flow-enable-conv-img2col-transform"]
-          }
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
-        }
+      "fp32": {
+        "default_compilation_flags": []
       }
     },
-    "vae": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=32",
-            "--iree-flow-enable-conv-img2col-transform"
-          ]
+    "untuned": {
+      "fp16": {
+        "default_compilation_flags": [
+          "--iree-flow-enable-padding-linalg-ops",
+          "--iree-flow-linalg-ops-padding-size=32"
+        ],
+        "specified_compilation_flags": {
+          "cuda": ["--iree-flow-enable-conv-nchw-to-nhwc-transform"],
+          "default_device": ["--iree-flow-enable-conv-img2col-transform"]
         }
       },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
+      "fp32": {
+        "default_compilation_flags": [
+          "--iree-flow-enable-conv-nchw-to-nhwc-transform",
+          "--iree-flow-enable-padding-linalg-ops",
+          "--iree-flow-linalg-ops-padding-size=16"
+        ]
+      }
+    }
+  },
+  "vae": {
+    "tuned": {
+      "fp16": {
+        "default_compilation_flags": [],
+        "specified_compilation_flags": {
+          "cuda": [],
+          "default_device": ["--iree-flow-enable-padding-linalg-ops",
+                             "--iree-flow-linalg-ops-padding-size=32",
+                             "--iree-flow-enable-conv-img2col-transform"]
+        }
+      },
+      "fp32": {
+        "default_compilation_flags": [],
+        "specified_compilation_flags": {
+          "cuda": [],
+          "default_device": [
             "--iree-flow-enable-padding-linalg-ops",
             "--iree-flow-linalg-ops-padding-size=32",
             "--iree-flow-enable-conv-img2col-transform"
           ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-enable-conv-nchw-to-nhwc-transform",
-            "--iree-flow-enable-padding-linalg-ops",
-            "--iree-flow-linalg-ops-padding-size=16"
-          ]
         }
       }
     },
-    "clip": {
-      "tuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
+    "untuned": {
+      "fp16": {
+        "default_compilation_flags": [
+          "--iree-flow-enable-padding-linalg-ops",
+          "--iree-flow-linalg-ops-padding-size=32",
+          "--iree-flow-enable-conv-img2col-transform"
+        ]
       },
-      "untuned": {
-        "fp16": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        },
-        "fp32": {
-          "default_compilation_flags": [
-            "--iree-flow-linalg-ops-padding-size=16",
-            "--iree-flow-enable-padding-linalg-ops"
-          ]
-        }
+      "fp32": {
+        "default_compilation_flags": [
+          "--iree-flow-enable-conv-nchw-to-nhwc-transform",
+          "--iree-flow-enable-padding-linalg-ops",
+          "--iree-flow-linalg-ops-padding-size=16"
+        ]
+      }
+    }
+  },
+  "clip": {
+    "tuned": {
+      "fp16": {
+        "default_compilation_flags": [
+          "--iree-flow-linalg-ops-padding-size=16",
+          "--iree-flow-enable-padding-linalg-ops"
+        ]
+      },
+      "fp32": {
+        "default_compilation_flags": [
+          "--iree-flow-linalg-ops-padding-size=16",
+          "--iree-flow-enable-padding-linalg-ops"
+        ]
+      }
+    },
+    "untuned": {
+      "fp16": {
+        "default_compilation_flags": [
+          "--iree-flow-linalg-ops-padding-size=16",
+          "--iree-flow-enable-padding-linalg-ops"
+        ]
+      },
+      "fp32": {
+        "default_compilation_flags": [
+          "--iree-flow-linalg-ops-padding-size=16",
+          "--iree-flow-enable-padding-linalg-ops"
+        ]
       }
     }
   }
+}
diff --git a/apps/stable_diffusion/src/utils/sd_annotation.py b/apps/stable_diffusion/src/utils/sd_annotation.py
diff --git a/apps/stable_diffusion/src/utils/stable_args.py b/apps/stable_diffusion/src/utils/stable_args.py
diff --git a/apps/stable_diffusion/src/utils/utils.py b/apps/stable_diffusion/src/utils/utils.py

Original file line number	Diff line number	Diff line change
`@@ -6,4 +6,6 @@`
`6`	`6`	`get_unet,`
`7`	`7`	`get_clip,`
`8`	`8`	`get_tokenizer,`
	`9`	`+ get_params,`
	`10`	`+ get_variant_version,`
`9`	`11`	`)`