Get default config based on the auto-detect CPU type (#1904)

yiliu30 · web-flow · commit 16a7b11508c0 · 2024-07-11T17:13:24.000+08:00
Signed-off-by: yiliu30 &lt;yi4.liu@intel.com&gt;
diff --git a/neural_compressor/common/utils/constants.py b/neural_compressor/common/utils/constants.py
@@ -56,3 +56,6 @@ class Mode(Enum):
     PREPARE = "prepare"
     CONVERT = "convert"
     QUANTIZE = "quantize"
+
+
+SERVER_PROCESSOR_BRAND_KEY_WORLD_LST = ["Xeon"]
diff --git a/neural_compressor/common/utils/utility.py b/neural_compressor/common/utils/utility.py
@@ -17,6 +17,7 @@
 """The utility of common module."""
 
 import collections
+import enum
 import importlib
 import subprocess
 import time
@@ -26,7 +27,7 @@
 import psutil
 from prettytable import PrettyTable
 
-from neural_compressor.common.utils import Mode, TuningLogger, logger
+from neural_compressor.common.utils import Mode, TuningLogger, constants, logger
 
 __all__ = [
     "set_workspace",
@@ -41,6 +42,9 @@
     "CpuInfo",
     "default_tuning_logger",
     "call_counter",
+    "cpu_info",
+    "ProcessorType",
+    "detect_processor_type_based_on_hw",
     "Statistics",
 ]
 
@@ -92,7 +96,7 @@ def __call__(self, *args, **kwargs):
 
 @singleton
 class CpuInfo(object):
-    """CPU info collection."""
+    """Get CPU Info."""
 
     def __init__(self):
         """Get whether the cpu numerical format is bf16, the number of sockets, cores and cores per socket."""
@@ -113,6 +117,39 @@ def __init__(self):
                     b"\xB8\x07\x00\x00\x00" b"\x0f\xa2" b"\xC3",  # mov eax, 7  # cpuid  # ret
                 )
                 self._bf16 = bool(eax & (1 << 5))
+        self._info = info
+        self._brand_raw = info.get("brand_raw", "")
+        # detect the below info when needed
+        self._cores = None
+        self._sockets = None
+        self._cores_per_socket = None
+
+    @property
+    def brand_raw(self):
+        """Get the brand name of the CPU."""
+        return self._brand_raw
+
+    @brand_raw.setter
+    def brand_raw(self, brand_name):
+        """Set the brand name of the CPU."""
+        self._brand_raw = brand_name
+
+    @staticmethod
+    def _detect_cores():
+        physical_cores = psutil.cpu_count(logical=False)
+        return physical_cores
+
+    @property
+    def cores(self):
+        """Get the number of cores in platform."""
+        if self._cores is None:
+            self._cores = self._detect_cores()
+        return self._cores
+
+    @cores.setter
+    def cores(self, num_of_cores):
+        """Set the number of cores in platform."""
+        self._cores = num_of_cores
 
     @property
     def bf16(self):
@@ -124,6 +161,60 @@ def vnni(self):
         """Get whether it is vnni."""
         return self._vnni
 
+    @property
+    def cores_per_socket(self) -> int:
+        """Get the cores per socket."""
+        if self._cores_per_socket is None:
+            self._cores_per_socket = self.cores // self.sockets
+        return self._cores_per_socket
+
+    @property
+    def sockets(self):
+        """Get the number of sockets in platform."""
+        if self._sockets is None:
+            self._sockets = self._get_number_of_sockets()
+        return self._sockets
+
+    @sockets.setter
+    def sockets(self, num_of_sockets):
+        """Set the number of sockets in platform."""
+        self._sockets = num_of_sockets
+
+    def _get_number_of_sockets(self) -> int:
+        if "arch" in self._info and "ARM" in self._info["arch"]:  # pragma: no cover
+            return 1
+
+        num_sockets = None
+        cmd = "cat /proc/cpuinfo | grep 'physical id' | sort -u | wc -l"
+        if psutil.WINDOWS:
+            cmd = r'wmic cpu get DeviceID | C:\Windows\System32\find.exe /C "CPU"'
+        elif psutil.MACOS:  # pragma: no cover
+            cmd = "sysctl -n machdep.cpu.core_count"
+
+        num_sockets = None
+        try:
+            with subprocess.Popen(
+                args=cmd,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                universal_newlines=False,
+            ) as proc:
+                proc.wait()
+                if proc.stdout:
+                    for line in proc.stdout:
+                        num_sockets = int(line.decode("utf-8", errors="ignore").strip())
+        except Exception as e:
+            logger.error("Failed to get number of sockets: %s" % e)
+        if isinstance(num_sockets, int) and num_sockets >= 1:
+            return num_sockets
+        else:
+            logger.warning("Failed to get number of sockets, return 1 as default.")
+            return 1
+
+
+cpu_info = CpuInfo()
+
 
 def dump_elapsed_time(customized_msg=""):
     """Get the elapsed time for decorated functions.
@@ -236,6 +327,43 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
+class ProcessorType(enum.Enum):
+    Client = "Client"
+    Server = "Server"
+
+
+def detect_processor_type_based_on_hw():
+    """Detects the processor type based on the hardware configuration.
+
+    Returns:
+        ProcessorType: The detected processor type (Server or Client).
+    """
+    # Detect the processor type based on below conditions:
+    #   If there are more than one sockets, it is a server.
+    #   If the brand name includes key word in `SERVER_PROCESSOR_BRAND_KEY_WORLD_LST`, it is a server.
+    #   If the memory size is greater than 32GB, it is a server.
+    log_mgs = "Processor type detected as {processor_type} due to {reason}."
+    if cpu_info.sockets > 1:
+        logger.info(log_mgs.format(processor_type=ProcessorType.Server.value, reason="there are more than one sockets"))
+        return ProcessorType.Server
+    elif any(brand in cpu_info.brand_raw for brand in constants.SERVER_PROCESSOR_BRAND_KEY_WORLD_LST):
+        logger.info(
+            log_mgs.format(processor_type=ProcessorType.Server.value, reason=f"the brand name is {cpu_info.brand_raw}.")
+        )
+        return ProcessorType.Server
+    elif psutil.virtual_memory().total / (1024**3) > 32:
+        logger.info(
+            log_mgs.format(processor_type=ProcessorType.Server.value, reason="the memory size is greater than 32GB")
+        )
+        return ProcessorType.Server
+    else:
+        logger.info(
+            "Processor type detected as %s, pass `processor_type='server'` to override it if needed.",
+            ProcessorType.Client.value,
+        )
+        return ProcessorType.Client
+
+
 class Statistics:
     """The statistics printer."""
 
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
@@ -23,6 +23,7 @@
 
 import torch
 
+import neural_compressor.torch.utils as torch_utils
 from neural_compressor.common.base_config import (
     BaseConfig,
     config_registry,
@@ -219,14 +220,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]
             dtype=["int4", "nf4"], use_sym=[True, False], group_size=[32, 128], use_mse_search=[False, True]
         )
 
+    @classmethod
+    def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "RTNConfig"]:
+        pre_defined_configs: Dict[torch_utils.ProcessorType, RTNConfig] = {}
+        pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
+        pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
+        return pre_defined_configs
 
-def get_default_rtn_config() -> RTNConfig:
-    """Generate the default rtn config.
 
-    Returns:
-        the default rtn config.
-    """
-    return RTNConfig()
+def get_default_rtn_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    process_type = torch_utils.get_processor_type_from_user_config(processor_type)
+    return RTNConfig.get_predefined_configs()[process_type]
 
 
 def get_default_double_quant_config(type="BNB_NF4"):
@@ -378,14 +382,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig
         # TODO fwk owner needs to update it.
         return GPTQConfig(act_order=[True, False], use_sym=[False, True])
 
+    @classmethod
+    def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "GPTQConfig"]:
+        pre_defined_configs: Dict[torch_utils.ProcessorType, GPTQConfig] = {}
+        pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
+        pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
+        return pre_defined_configs
 
-def get_default_gptq_config() -> GPTQConfig:
-    """Generate the default gptq config.
 
-    Returns:
-        the default gptq config.
-    """
-    return GPTQConfig()
+def get_default_gptq_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    process_type = torch_utils.get_processor_type_from_user_config(processor_type)
+    return GPTQConfig.get_predefined_configs()[process_type]
 
 
 ######################## AWQ Config ###############################
@@ -725,6 +732,7 @@ def __init__(
         not_use_best_mse: bool = False,
         dynamic_max_gap: int = -1,
         scale_dtype: str = "fp16",
+        use_layer_wise: bool = False,
         white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
     ):
         """Init AUTOROUND weight-only quantization config.
@@ -777,6 +785,7 @@ def __init__(
         self.not_use_best_mse = not_use_best_mse
         self.dynamic_max_gap = dynamic_max_gap
         self.scale_dtype = scale_dtype
+        self.use_layer_wise = use_layer_wise
         self._post_init()
 
     @classmethod
@@ -803,14 +812,17 @@ def get_config_set_for_tuning(cls) -> Union[None, "AutoRoundConfig", List["AutoR
         # TODO fwk owner needs to update it.
         return AutoRoundConfig(bits=[4, 6])
 
+    @classmethod
+    def get_predefined_configs(cls) -> Dict[torch_utils.ProcessorType, "AutoRoundConfig"]:
+        pre_defined_configs: Dict[torch_utils.ProcessorType, AutoRoundConfig] = {}
+        pre_defined_configs[torch_utils.ProcessorType.Client] = cls(use_layer_wise=True)
+        pre_defined_configs[torch_utils.ProcessorType.Server] = cls()
+        return pre_defined_configs
 
-def get_default_AutoRound_config() -> AutoRoundConfig:
-    """Generate the default AUTOROUND config.
 
-    Returns:
-        the default AUTOROUND config.
-    """
-    return AutoRoundConfig()
+def get_default_AutoRound_config(processor_type: Optional[Union[str, torch_utils.ProcessorType]] = None) -> RTNConfig:
+    process_type = torch_utils.get_processor_type_from_user_config(processor_type)
+    return AutoRoundConfig.get_predefined_configs()[process_type]
 
 
 ######################## MX Config ###############################
diff --git a/neural_compressor/torch/utils/utility.py b/neural_compressor/torch/utils/utility.py
@@ -13,12 +13,21 @@
 # limitations under the License.
 
 
-from typing import Callable, Dict, List, Tuple, Union
+import enum
+from typing import Callable, Dict, List, Optional, Tuple, Union
 
+import psutil
 import torch
 from typing_extensions import TypeAlias
 
-from neural_compressor.common.utils import Mode, Statistics, logger
+from neural_compressor.common.utils import (
+    Mode,
+    ProcessorType,
+    Statistics,
+    cpu_info,
+    detect_processor_type_based_on_hw,
+    logger,
+)
 
 OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]
 
@@ -235,3 +244,31 @@ def get_model_device(model: torch.nn.Module):
     """
     for n, p in model.named_parameters():
         return p.data.device.type  # p.data.device == device(type='cpu')
+
+
+def get_processor_type_from_user_config(user_processor_type: Optional[Union[str, ProcessorType]] = None):
+    """Get the processor type.
+
+    Get the processor type based on the user configuration or automatically detect it based on the hardware.
+
+    Args:
+        user_processor_type (Optional[Union[str, ProcessorType]]): The user-specified processor type. Defaults to None.
+
+    Returns:
+        ProcessorType: The detected or user-specified processor type.
+
+    Raises:
+        AssertionError: If the user-specified processor type is not supported.
+        NotImplementedError: If the processor type is not recognized.
+    """
+    if user_processor_type is None:
+        processor_type = detect_processor_type_based_on_hw()
+    elif isinstance(user_processor_type, ProcessorType):
+        processor_type = user_processor_type
+    elif isinstance(user_processor_type, str):
+        user_processor_type = user_processor_type.lower().capitalize()
+        assert user_processor_type in ProcessorType.__members__, f"Unsupported processor type: {user_processor_type}"
+        processor_type = ProcessorType(user_processor_type)
+    else:
+        raise NotImplementedError(f"Unsupported processor type: {user_processor_type}")
+    return processor_type
diff --git a/test/3x/common/test_utility.py b/test/3x/common/test_utility.py
diff --git a/test/3x/torch/test_config.py b/test/3x/torch/test_config.py