[FIX] Gradlib OOM on Navi and sometimes on MI (#124)

maleksan85 · maleksan85 · root · commit d45dfeaa55ad · 2024-08-12T23:14:30.000Z
* add memory clean up after every shape and parameter to reduce cache invalidation buffers

* small typo

* syntax change

---------

Co-authored-by: maleksan85 &lt;maleksan@amd.com&gt;
diff --git a/gradlib/gradlib/GemmTuner.py b/gradlib/gradlib/GemmTuner.py
@@ -1,3 +1,4 @@
+import os
 import random
 from pathlib import Path
 
@@ -13,6 +14,8 @@
 rtol = 1e-5
 atol = 1
 
+CACHE_INVALIDATE_BUFFERS = int(os.getenv("CACHE_INVALIDATE_BUFFERS", "37"))
+
 
 class Gemm:
 
@@ -24,7 +27,7 @@ def __init__(self, m, n, k, indtype, outdtype, rocblas_decode=False):
         self.outdtype = outdtype
         self.use_rocblas = (indtype == outdtype
                             and indtype is not torch.float8_e4m3fnuz)
-        self.nb = 37
+        self.nb = CACHE_INVALIDATE_BUFFERS
         self.inp = torch.randn((self.n, self.k),
                                device='cuda').to(self.indtype)
         self.weights = torch.randn((self.m, self.k),
@@ -283,6 +286,9 @@ def find_best_sols(self):
             soldf.loc[i, 'libtype'] = gemmobj.best_libtype
             soldf.loc[i, 'solidx'] = gemmobj.best_solidx
             soldf.loc[i, 'soltimems'] = gemmobj.best_soltime
+            del gemmobj
+            torch.cuda.empty_cache()
+
         soldf['indtype'] = self.indtype
         soldf['outdtype'] = self.outdtype
         finaldf = pd.concat([self.gemm_problems, soldf], axis=1)