[WEB] Cache the compiled module.

Prashant Kumar · Prashant Kumar · commit 885b0969f5dd · 2022-09-01T23:08:08.000+05:30
-- Don't compile the module again and again.
diff --git a/web/models/albert_maskfill.py b/web/models/albert_maskfill.py
@@ -6,6 +6,7 @@
 
 MAX_SEQUENCE_LENGTH = 512
 BATCH_SIZE = 1
+COMPILE_MODULE = None
 
 
 class AlbertModule(torch.nn.Module):
@@ -54,18 +55,23 @@ def top5_possibilities(text, inputs, token_logits):
 
 
 def albert_maskfill_inf(masked_text):
+    global COMPILE_MODULE
     inputs = preprocess_data(masked_text)
-    mlir_importer = SharkImporter(
-        AlbertModule(),
-        inputs,
-        frontend="torch",
-    )
-    minilm_mlir, func_name = mlir_importer.import_mlir(
-        is_dynamic=False, tracing_required=True
-    )
-    shark_module = SharkInference(
-        minilm_mlir, func_name, mlir_dialect="linalg"
-    )
-    shark_module.compile()
-    token_logits = torch.tensor(shark_module.forward(inputs))
+    if COMPILE_MODULE == None:
+        print("module compiled")
+        mlir_importer = SharkImporter(
+            AlbertModule(),
+            inputs,
+            frontend="torch",
+        )
+        minilm_mlir, func_name = mlir_importer.import_mlir(
+            is_dynamic=False, tracing_required=True
+        )
+        shark_module = SharkInference(
+            minilm_mlir, func_name, mlir_dialect="linalg", device="intel-gpu"
+        )
+        shark_module.compile()
+        COMPILE_MODULE = shark_module
+
+    token_logits = torch.tensor(COMPILE_MODULE.forward(inputs))
     return top5_possibilities(masked_text, inputs, token_logits)
diff --git a/web/models/resnet50.py b/web/models/resnet50.py
@@ -7,6 +7,8 @@
 
 ################################## Preprocessing inputs and model ############
 
+COMPILE_MODULE = None
+
 
 def preprocess_image(img):
     image = Image.fromarray(img)
@@ -49,13 +51,19 @@ def top3_possibilities(res):
 def resnet_inf(numpy_img):
     img = preprocess_image(numpy_img)
     ## Can pass any img or input to the forward module.
-    mlir_model, func_name, inputs, golden_out = download_torch_model(
-        "resnet50"
-    )
+    global COMPILE_MODULE
+    if COMPILE_MODULE == None:
+        mlir_model, func_name, inputs, golden_out = download_torch_model(
+            "resnet50"
+        )
+
+        shark_module = SharkInference(
+            mlir_model, func_name, device="intel-gpu", mlir_dialect="linalg"
+        )
+        shark_module.compile()
+        COMPILE_MODULE = shark_module
 
-    shark_module = SharkInference(mlir_model, func_name, mlir_dialect="linalg")
-    shark_module.compile()
-    result = shark_module.forward((img.detach().numpy(),))
+    result = COMPILE_MODULE.forward((img.detach().numpy(),))
 
     #  print("The top 3 results obtained via shark_runner is:")
     return top3_possibilities(torch.from_numpy(result))