Adding batch_size support for stable diffusion.

Prashant Kumar · Prashant Kumar · commit 32e1ba8c0d6d · 2022-11-01T00:57:52.000+05:30
diff --git a/shark/examples/shark_inference/stable_diffusion/main.py b/shark/examples/shark_inference/stable_diffusion/main.py
@@ -24,6 +24,22 @@
 TUNED_GCLOUD_BUCKET = "gs://shark_tank/quinn"
 UNET_FP16_TUNED = "unet_fp16_tuned"
 
+BATCH_SIZE = len(args.prompts)
+
+if BATCH_SIZE not in [1, 2]:
+    import sys
+
+    sys.exit("Only batch size 1 and 2 are supported.")
+
+if BATCH_SIZE > 1 and args.precision != "fp16":
+    sys.exit("batch size > 1 is supported for fp16 model.")
+
+
+if BATCH_SIZE != 1:
+    TUNED_GCLOUD_BUCKET = "gs://shark_tank/prashant_nod"
+    UNET_FP16_TUNED = f"unet_fp16_{BATCH_SIZE}"
+    VAE_FP16 = f"vae_fp16_{BATCH_SIZE}"
+
 # Helper function to profile the vulkan device.
 def start_profiling(file_path="foo.rdc", profiling_mode="queue"):
     if args.vulkan_debug_utils and "vulkan" in args.device:
@@ -67,6 +83,10 @@ def get_models():
             vae_args = IREE_EXTRA_ARGS
             unet_name = UNET_FP16
             vae_name = VAE_FP16
+
+        if batch_size > 1:
+            vae_args = []
+
         if args.import_mlir == True:
             return get_vae16(model_name=VAE_FP16), get_unet16_wrapped(
                 model_name=UNET_FP16
@@ -112,8 +132,7 @@ def get_models():
             f"-iree-vulkan-target-triple={args.iree_vulkan_target_triple}"
         )
 
-    prompt = [args.prompt]
-
+    prompt = args.prompts
     height = 512  # default height of Stable Diffusion
     width = 512  # default width of Stable Diffusion
 
@@ -211,4 +230,5 @@ def get_models():
     print("Total image generation runtime (s): {}".format(time.time() - start))
 
     pil_images = [Image.fromarray(image) for image in images]
-    pil_images[0].save(f"{args.prompt}.jpg")
+    for i in range(batch_size):
+        pil_images[i].save(f"{args.prompts[i]}_{i}.jpg")
diff --git a/shark/examples/shark_inference/stable_diffusion/model_wrappers.py b/shark/examples/shark_inference/stable_diffusion/model_wrappers.py
@@ -6,6 +6,9 @@
 YOUR_TOKEN = "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"
 
 
+BATCH_SIZE = len(args.prompts)
+
+
 def get_vae32(model_name="vae_fp32"):
     class VaeModel(torch.nn.Module):
         def __init__(self):
@@ -21,7 +24,7 @@ def forward(self, input):
             return (x / 2 + 0.5).clamp(0, 1)
 
     vae = VaeModel()
-    vae_input = torch.rand(1, 4, 64, 64)
+    vae_input = torch.rand(BATCH_SIZE, 4, 64, 64)
     shark_vae = compile_through_fx(
         vae,
         (vae_input,),
@@ -47,7 +50,7 @@ def forward(self, input):
 
     vae = VaeModel()
     vae = vae.half().cuda()
-    vae_input = torch.rand(1, 4, 64, 64, dtype=torch.half).cuda()
+    vae_input = torch.rand(BATCH_SIZE, 4, 64, 64, dtype=torch.half).cuda()
     shark_vae = compile_through_fx(
         vae,
         (vae_input,),
@@ -143,8 +146,10 @@ def forward(self, latent, timestep, text_embedding, sigma):
 
     unet = UnetModel()
     unet = unet.half().cuda()
-    latent_model_input = torch.rand([1, 4, 64, 64]).half().cuda()
-    text_embeddings = torch.rand([2, args.max_length, 768]).half().cuda()
+    latent_model_input = torch.rand([BATCH_SIZE, 4, 64, 64]).half().cuda()
+    text_embeddings = (
+        torch.rand([2 * BATCH_SIZE, args.max_length, 768]).half().cuda()
+    )
     sigma = torch.tensor(1).to(torch.float32)
     shark_unet = compile_through_fx(
         unet,
@@ -185,8 +190,8 @@ def forward(self, latent, timestep, text_embedding, sigma):
             return noise_pred
 
     unet = UnetModel()
-    latent_model_input = torch.rand([1, 4, 64, 64])
-    text_embeddings = torch.rand([2, args.max_length, 768])
+    latent_model_input = torch.rand([BATCH_SIZE, 4, 64, 64])
+    text_embeddings = torch.rand([2 * BATCH_SIZE, args.max_length, 768])
     sigma = torch.tensor(1).to(torch.float32)
     shark_unet = compile_through_fx(
         unet,
diff --git a/shark/examples/shark_inference/stable_diffusion/stable_args.py b/shark/examples/shark_inference/stable_diffusion/stable_args.py
@@ -5,10 +5,10 @@
 )
 
 p.add_argument(
-    "--prompt",
-    type=str,
-    default="a photograph of an astronaut riding a horse",
-    help="the text to generate image of.",
+    "--prompts",
+    nargs="+",
+    default=["a photograph of an astronaut riding a horse"],
+    help="text of which images to be generated.",
 )
 p.add_argument(
     "--device", type=str, default="cpu", help="device to run the model."
diff --git a/shark/examples/shark_inference/stable_diffusion/utils.py b/shark/examples/shark_inference/stable_diffusion/utils.py
@@ -91,7 +91,7 @@ def strip_overloads(gm):
         frontend="torch",
     )
 
-    mlir_module, func_name = mlir_importer.import_mlir()
+    (mlir_module, func_name), _, _ = mlir_importer.import_debug()
 
     shark_module = SharkInference(
         mlir_module,

Original file line number	Diff line number	Diff line change
`@@ -5,10 +5,10 @@`
`5`	`5`	`)`
`6`	`6`
`7`	`7`	`p.add_argument(`
`8`		`- "--prompt",`
`9`		`- type=str,`
`10`		`- default="a photograph of an astronaut riding a horse",`
`11`		`- help="the text to generate image of.",`
	`8`	`+ "--prompts",`
	`9`	`+ nargs="+",`
	`10`	`+ default=["a photograph of an astronaut riding a horse"],`
	`11`	`+ help="text of which images to be generated.",`
`12`	`12`	`)`
`13`	`13`	`p.add_argument(`
`14`	`14`	`"--device", type=str, default="cpu", help="device to run the model."`
Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@ def strip_overloads(gm):`
`91`	`91`	`frontend="torch",`
`92`	`92`	`)`
`93`	`93`
`94`		`- mlir_module, func_name = mlir_importer.import_mlir()`
	`94`	`+ (mlir_module, func_name), _, _ = mlir_importer.import_debug()`
`95`	`95`
`96`	`96`	`shark_module = SharkInference(`
`97`	`97`	`mlir_module,`