More log fixes (#1328)

Qubitium · web-flow · commit 21b17598f19f · 2025-02-22T11:40:00.000+08:00
* due to how the pb loops, call pb.iter() to get the current iter step

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;

* rename

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;

* fix pb usage

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;

---------

Signed-off-by: Qubitium &lt;Qubitium@modelcloud.ai&gt;
diff --git a/gptqmodel/utils/model.py b/gptqmodel/utils/model.py
@@ -516,7 +516,7 @@ def pack_module(name, qModules, quant_result, layers, pbar=None):
         qModules[name].pack(linear=layers[name], scales=scale, zeros=zero, g_idx=g_idx)
         qModules[name].to(layer_device)
         if pbar:
-            pbar.iter()
+            pbar.next()
             pbar.progress()
 
 
diff --git a/gptqmodel/utils/progress.py b/gptqmodel/utils/progress.py
@@ -63,7 +63,7 @@ def __init__(self,
         self.bar_length = bar_length
         self.fill = fill
         self.info_text = info
-        self.current_iteration = 0
+        self.current_iter_step = 0
         self.time = time.time()
 
     def info(self, info:str):
@@ -74,7 +74,7 @@ def info(self, info:str):
 
     def progress(self, iteration:int = None):
         if not iteration:
-            iteration = self.current_iteration
+            iteration = self.current_iter_step
 
         columns, _ = terminal_size()
         bar_length = columns
@@ -98,7 +98,7 @@ def progress(self, iteration:int = None):
 
         filled_length = int(bar_length * iteration // len(self))
         bar = self.fill * filled_length + '-' * (bar_length - filled_length)
-        self.log(bar=bar, log=log, padding=padding, end='\n' if percent_num >= 1.0 else '')
+        self.log(bar=bar, log=log, padding=padding, end='') # '\n' if percent_num >= 1.0 else ''
 
     def calc_time(self, iteration):
         used_time = int(time.time() - self.time)
@@ -169,18 +169,21 @@ def _comparable(self):
     def __hash__(self):
         return id(self)
 
-    def iter(self):
-        self.current_iteration += 1
+    def step(self) -> int:
+        return self.current_iter_step
+
+    def next(self):
+        self.current_iter_step += 1
 
     def __iter__(self):
         iterable = self.iterable
 
         for obj in iterable:
-            self.iter()
+            self.next()
             self.progress()
             yield obj
 
-        self.progress()
+        # self.progress()
         self.close()
         return
 
diff --git a/tests/inference_speed.py b/tests/inference_speed.py
@@ -32,7 +32,7 @@
 class InferenceSpeed(unittest.TestCase):
     NATIVE_MODEL_ID = "/monster/data/model/DeepSeek-R1-Distill-Qwen-7B-gptqmodel-4bit-vortex-v2"
     BITBLAS_NATIVE_MODEL_ID = "/monster/data/model/opt-125M-autoround-lm_head-false-symTrue"
-    MAX_NEW_TOEKNS = 10
+    MAX_NEW_TOKENS = 10
     NUM_RUNS = 20
     PROMPTS = [
         "I am in Paris and I",
@@ -69,11 +69,11 @@ def inference(self, model_path, backend, tokens_per_second, assert_result=True,
         # compile kernels need JIT compile (Bitblas, IPEX, Triton) so we should do some warmup before actual speed run
         if warmup_runs > 0:
             pb = ProgressBar(range(warmup_runs))
-            for i in pb:
-                pb.info(f"warmup run index {i} of {warmup_runs - 1}")
+            for _ in pb:
+                pb.info(f"warmup run index {pb.step()} of {len(pb)}")
                 pb.progress()
                 start_time = time.time()
-                result = model.generate(**inp, max_new_tokens=self.MAX_NEW_TOEKNS, pad_token_id=tokenizer.pad_token_id)
+                result = model.generate(**inp, max_new_tokens=self.MAX_NEW_TOKENS, pad_token_id=tokenizer.pad_token_id)
                 end_time = time.time()
                 elapsed_time = end_time - start_time
                 times.append(elapsed_time)
@@ -97,10 +97,10 @@ def inference(self, model_path, backend, tokens_per_second, assert_result=True,
             print(f"****************  {backend} Warm-up Result Info End****************")
 
         pb = ProgressBar(range(self.NUM_RUNS))
-        for i in pb:
-            pb.info(f"run index {i} of {self.NUM_RUNS - 1}")
+        for _ in pb:
+            pb.info(f"run index {pb.step()} of {len(pb)}")
             start_time = time.time()
-            result = model.generate(**inp, max_new_tokens=self.MAX_NEW_TOEKNS, pad_token_id=tokenizer.pad_token_id)
+            result = model.generate(**inp, max_new_tokens=self.MAX_NEW_TOKENS, pad_token_id=tokenizer.pad_token_id)
             end_time = time.time()
             elapsed_time = end_time - start_time
             times.append(elapsed_time)