[SW-193263] Switch HQT unit tests to run on INC

Tiefen-boop · Tiefen-boop · commit 7bf952189d97 · 2024-07-29T11:47:02.000+03:00
Modify test to point to the correct package in INC instead of HQT.
Add __init__.py file to include needed content for test_layers' tests.

Change-Id: If47acdfc9f7521a54a7f350a444711a7c2b3e5b2
diff --git a/test/3x/torch/algorithms/fp8_quant/fp8_tests.py b/test/3x/torch/algorithms/fp8_quant/fp8_tests.py
@@ -1,6 +1,6 @@
-import habana_frameworks.torch.core as htcore
-import habana_quantization_toolkit
 import torch
+import habana_frameworks.torch.core as htcore
+import neural_compressor.torch.algorithms.fp8_quant
 
 # This file is for small tests run for debug flow and accuracy. (Not for CI)
 
@@ -73,7 +73,7 @@ def forward(self, x, b):
 model.eval()
 model = model.to("hpu").to(torch.bfloat16)
 htcore.hpu_initialize()
-habana_quantization_toolkit.prep_model(model)  # fp8 additions
+neural_compressor.torch.algorithms.fp8_quant.prep_model(model)  # fp8 additions
 
 
 with torch.no_grad():
@@ -170,4 +170,4 @@ def forward(self, x, b):
     # 5) tensor([[232.]], device='hpu:0', dtype=torch.bfloat16)
 
     # fp8 additions
-    habana_quantization_toolkit.finish_measurements(model)
+    neural_compressor.torch.algorithms.fp8_quant.finish_measurements(model)
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_deepspeed.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_deepspeed.py
@@ -2,8 +2,8 @@
 
 import pytest
 import torch
-from habana_quantization_toolkit._quant_common.quant_config import ScaleMethod
-from habana_quantization_toolkit.tests import TestVector, run_accuracy_test
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.quant_config import ScaleMethod
+from ..tester import run_accuracy_test, TestVector
 
 
 class LinearBlock(torch.nn.Module):
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_functions/test_config_json.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_functions/test_config_json.py
@@ -1,9 +1,9 @@
 """Use this module as an example of how to write new unit tests for layers."""
-
-import habana_quantization_toolkit as hqt
+import os
 import torch
-from habana_quantization_toolkit._quant_common.helper_modules import Matmul
-from habana_quantization_toolkit._quant_common.quant_config import QuantMode
+import neural_compressor.torch.algorithms.fp8_quant as fp8_quant
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.quant_config import QuantMode
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.helper_modules import Matmul
 
 
 class Model(torch.nn.Module):
@@ -20,6 +20,7 @@ def test_config_json():
             QuantMode.MEASURE: "measure",
             QuantMode.QUANTIZE: "quant",
         }[mode]
-        config_path = f"llama_{name}"
-        hqt.prep_model(model, config_path=config_path)
-        hqt.finish_measurements(model)
+        config_path = os.path.join(os.environ.get("NEURAL_COMPRESSOR_FORK_ROOT"),
+                                   f"neural_compressor/torch/algorithms/fp8_quant/custom_config/llama_{name}.json")
+        fp8_quant.prep_model(model, config_path=config_path)
+        fp8_quant.finish_measurements(model)
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_functions/test_matmul_fp8.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_functions/test_matmul_fp8.py
@@ -1,11 +1,10 @@
 import itertools
 from typing import Iterable, Tuple
-
-import habana_frameworks.torch.utils.experimental as htexp
 import pytest
 import torch
-from habana_quantization_toolkit._core.fp_utils import FP8_143_SCALES
-from habana_quantization_toolkit._quant_common.helper_modules import matmul_fp8
+from neural_compressor.torch.algorithms.fp8_quant._core.fp_utils import FP8_143_SCALES
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.helper_modules import matmul_fp8
+import habana_frameworks.torch.utils.experimental as htexp
 
 
 def run_test_matmul_fp8(
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/__init__.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/__init__.py
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_conv2d.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_conv2d.py
@@ -2,8 +2,8 @@
 
 import pytest
 import torch
-from habana_quantization_toolkit._quant_common.quant_config import ScaleMethod
-from habana_quantization_toolkit.tests import TestVector, run_accuracy_test
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.quant_config import ScaleMethod
+from ...tester import run_accuracy_test, TestVector
 
 
 def get_test_vectors(*, dtype: torch.dtype, C_in: int, H: int, W: int) -> typing.Iterable[TestVector]:
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_linear.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_linear.py
@@ -2,8 +2,8 @@
 
 import pytest
 import torch
-from habana_quantization_toolkit._quant_common.quant_config import ScaleMethod
-from habana_quantization_toolkit.tests import TestVector, run_accuracy_test
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.quant_config import ScaleMethod
+from ...tester import run_accuracy_test, TestVector
 
 
 def get_test_vectors(*, dtype: torch.dtype, N: int, D_in: int) -> typing.Iterable[TestVector]:
diff --git a/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_matmul.py b/test/3x/torch/algorithms/fp8_quant/unit_tests/test_layers/test_matmul.py
@@ -2,8 +2,8 @@
 
 import pytest
 import torch
-from habana_quantization_toolkit._quant_common.quant_config import ScaleMethod
-from habana_quantization_toolkit.tests import TestVector, run_accuracy_test
+from neural_compressor.torch.algorithms.fp8_quant._quant_common.quant_config import ScaleMethod
+from ...tester import run_accuracy_test, TestVector
 
 
 def get_test_vectors(*, dtype: torch.dtype) -> typing.Iterable[TestVector]:
@@ -31,8 +31,8 @@ def get_test_vectors(*, dtype: torch.dtype) -> typing.Iterable[TestVector]:
 
 
 class Matmul(torch.nn.Module):
-    """This is a mimic of other implementations of `Matmul`.
-
+    """
+    This is a mimic of other implementations of `Matmul`.
     It is here to not create a dependency on optimum-habana (which is logically needed).
     It should not be used directly in user code.
     """