@@ -124,7 +124,7 @@ def get_weights(fn):
124
124
125
125
126
126
def quantize_q8_0 (tensor : torch .Tensor ) -> torch .CharTensor :
127
- # equivalent to ggml_quantize_q8_0 in ggml.c
127
+ # equivalent to ggml_quantize_q8_0 in ggml.c (modulo rounding away from zero)
128
128
assert tensor .shape [1 ] % GGML_QK8_0 == 0
129
129
tensor = tensor .reshape (- 1 , GGML_QK8_0 )
130
130
scale = tensor .abs ().max (dim = - 1 , keepdim = True ).values / ((1 << 7 ) - 1 )
@@ -135,7 +135,7 @@ def quantize_q8_0(tensor: torch.Tensor) -> torch.CharTensor:
135
135
136
136
137
137
def quantize_q4_0 (tensor : torch .Tensor ) -> torch .CharTensor :
138
- # equivalent to ggml_quantize_q4_0 in ggml.c
138
+ # equivalent to ggml_quantize_q4_0 in ggml.c (modulo rounding away from zero)
139
139
assert tensor .shape [1 ] % GGML_QK4_0 == 0
140
140
tensor = tensor .reshape (- 1 , GGML_QK4_0 )
141
141
abs_max_indices = tensor .abs ().max (dim = - 1 , keepdim = True ).indices
@@ -150,7 +150,7 @@ def quantize_q4_0(tensor: torch.Tensor) -> torch.CharTensor:
150
150
151
151
152
152
def quantize_q4_1 (tensor : torch .Tensor ) -> torch .CharTensor :
153
- # equivalent to ggml_quantize_q4_1 in ggml.c
153
+ # equivalent to ggml_quantize_q4_1 in ggml.c (modulo rounding away from zero)
154
154
assert tensor .shape [1 ] % GGML_QK4_1 == 0
155
155
tensor = tensor .reshape (- 1 , GGML_QK4_1 )
156
156
abs_max_indices = tensor .max (dim = - 1 , keepdim = True ).indices
@@ -170,13 +170,14 @@ def quantize_q4_1(tensor: torch.Tensor) -> torch.CharTensor:
170
170
171
171
def maybe_quantize_tensor (tensor , ggml_type ):
172
172
assert tensor .dtype == torch .float32
173
-
174
173
if ggml_type == gguf .GGMLQuantizationType .F32 :
175
174
return tensor .float ()
176
175
elif ggml_type == gguf .GGMLQuantizationType .F16 :
177
176
return tensor .half ()
178
177
elif ggml_type == gguf .GGMLQuantizationType .Q8_0 :
179
- return quantize_q8_0 (tensor )
178
+ if tensor .device .type == "meta" :
179
+ return quantize_q8_0 (tensor ) # Cannot convert into numpy array.
180
+ return torch .from_numpy (gguf .quantize_q8_0 (tensor .numpy ()))
180
181
elif ggml_type == gguf .GGMLQuantizationType .Q4_0 :
181
182
return quantize_q4_0 (tensor )
182
183
elif ggml_type == gguf .GGMLQuantizationType .Q4_1 :
0 commit comments