|
21 | 21 |
|
22 | 22 | aten = torch.ops.aten |
23 | 23 |
|
24 | | -try: |
25 | | - import torch_npu |
26 | | -except ImportError: |
27 | | - torch_npu = None |
28 | | - |
29 | 24 |
|
30 | 25 | class Int4PlainInt32TensorNPU(TorchAOBaseTensor): |
31 | 26 | """ |
@@ -93,9 +88,6 @@ def from_hp( |
93 | 88 | w: torch.Tensor, |
94 | 89 | block_size: List[int], |
95 | 90 | ): |
96 | | - if torch_npu is None: |
97 | | - raise ImportError("Requires torch_npu but it is not installed") |
98 | | - |
99 | 91 | assert w.ndim == 2 and w.device.type == "npu", ( |
100 | 92 | f"Expecting 2D tensor on NPU, but got: {w.shape} on {w.device.type}" |
101 | 93 | ) |
@@ -143,7 +135,7 @@ def from_hp( |
143 | 135 | f"torch_npu.npu_convert_weight_to_int4pack expects last dim must be aligned to 8,but got {int_data.shape[-1]}" |
144 | 136 | ) |
145 | 137 |
|
146 | | - packed_weight = torch_npu.npu_convert_weight_to_int4pack( |
| 138 | + packed_weight = torch.ops.npu.npu_convert_weight_to_int4pack( |
147 | 139 | int_data.contiguous(), 0 |
148 | 140 | ) |
149 | 141 |
|
@@ -174,9 +166,6 @@ def _(func, types, args, kwargs): |
174 | 166 | args[2] if len(args) > 2 else None, |
175 | 167 | ) |
176 | 168 |
|
177 | | - if torch_npu is None: |
178 | | - raise ImportError("Requires torch_npu but it is not installed") |
179 | | - |
180 | 169 | assert input_tensor.device.type == "npu", ( |
181 | 170 | f"For NPU device only but got: {input_tensor.device.type}" |
182 | 171 | ) |
@@ -219,7 +208,7 @@ def _(func, types, args, kwargs): |
219 | 208 | # groupwise int4 quantization |
220 | 209 | groupsize = weight_tensor.block_size[1] |
221 | 210 |
|
222 | | - y = torch_npu.npu_weight_quant_batchmatmul( |
| 211 | + y = torch.ops.npu.npu_weight_quant_batchmatmul( |
223 | 212 | x=act_mat, |
224 | 213 | weight=packed_weight.contiguous().transpose(-1, -2), |
225 | 214 | antiquant_scale=scale, |
|
0 commit comments