Skip to content

Commit 88a263a

Browse files
authored
Spelling fixes (#662)
* Spelling fixes for inpt_tensor to input_tensor * inpt_tensor -> input_tensor
1 parent e7fc0ed commit 88a263a

File tree

3 files changed

+76
-76
lines changed

3 files changed

+76
-76
lines changed

test/dtypes/test_nf4.py

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -157,10 +157,10 @@ def test_nf4_bnb_linear(self, dtype: torch.dtype):
157157
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
158158
def test_load_from_state_dicts(self, dtype: torch.dtype):
159159
"""Tests loading to and from different module state dicts"""
160-
inpt_tensor = torch.rand(64, device='cuda', dtype=dtype)
161-
base_mod = self.TestMod(inpt_tensor, 32, 2)
160+
input_tensor = torch.rand(64, device='cuda', dtype=dtype)
161+
base_mod = self.TestMod(input_tensor, 32, 2)
162162

163-
dummy_dict = {"param": inpt_tensor}
163+
dummy_dict = {"param": input_tensor}
164164
base_mod.load_state_dict(dummy_dict)
165165

166166
assert base_mod.param.block_size == 32
@@ -170,12 +170,12 @@ def test_load_from_state_dicts(self, dtype: torch.dtype):
170170
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
171171
def test_load_from_nf4_same_meta(self, dtype: torch.dtype):
172172
"""Tests loading to and from different module state dicts"""
173-
inpt_tensor = torch.rand(64, device='cuda', dtype=dtype)
174-
base_mod = self.TestMod(inpt_tensor, 32, 2)
173+
input_tensor = torch.rand(64, device='cuda', dtype=dtype)
174+
base_mod = self.TestMod(input_tensor, 32, 2)
175175
state_dict = base_mod.state_dict()
176176
saved_state_dict = self.save_state_dict_to_buffer(state_dict)
177177

178-
other_mod = self.TestMod(inpt_tensor, 32, 2)
178+
other_mod = self.TestMod(input_tensor, 32, 2)
179179
other_mod.load_state_dict(torch.load(saved_state_dict))
180180
assert other_mod.param.block_size == 32
181181
assert other_mod.param.scaler_block_size == 2
@@ -184,50 +184,50 @@ def test_load_from_nf4_same_meta(self, dtype: torch.dtype):
184184
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
185185
def test_load_from_nf4_diff_meta(self, dtype: torch.dtype):
186186
"""Tests loading to and from different module state dicts"""
187-
inpt_tensor = torch.rand(128, device='cuda', dtype=dtype)
188-
base_mod = self.TestMod(inpt_tensor, 32, 2)
187+
input_tensor = torch.rand(128, device='cuda', dtype=dtype)
188+
base_mod = self.TestMod(input_tensor, 32, 2)
189189
state_dict = base_mod.state_dict()
190190
saved_state_dict = self.save_state_dict_to_buffer(state_dict)
191191

192-
other_mod = self.TestMod(inpt_tensor, 64, 1)
192+
other_mod = self.TestMod(input_tensor, 64, 1)
193193
other_mod.load_state_dict(torch.load(saved_state_dict))
194194
assert other_mod.param.block_size == 64
195195
assert other_mod.param.scaler_block_size == 1
196196

197197
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
198198
def test_to_copy(self, dtype: torch.dtype):
199-
inpt_tensor = torch.rand(128, device='cpu')
200-
inpt_tensor_nf4 = to_nf4(inpt_tensor, 32, 2)
201-
nf4_to_dtype = inpt_tensor_nf4.to(dtype)
202-
torch.testing.assert_allclose(inpt_tensor, nf4_to_dtype, atol=0.13, rtol=0.13)
199+
input_tensor = torch.rand(128, device='cpu')
200+
input_tensor_nf4 = to_nf4(input_tensor, 32, 2)
201+
nf4_to_dtype = input_tensor_nf4.to(dtype)
202+
torch.testing.assert_allclose(input_tensor, nf4_to_dtype, atol=0.13, rtol=0.13)
203203

204204
if torch.cuda.is_available():
205-
inpt_tensor = torch.rand(128, device='cuda')
206-
inpt_tensor_nf4 = to_nf4(inpt_tensor, 32, 2)
207-
nf4_to_dtype = inpt_tensor_nf4.to(dtype)
208-
torch.testing.assert_allclose(inpt_tensor, nf4_to_dtype, atol=0.13, rtol=0.13)
205+
input_tensor = torch.rand(128, device='cuda')
206+
input_tensor_nf4 = to_nf4(input_tensor, 32, 2)
207+
nf4_to_dtype = input_tensor_nf4.to(dtype)
208+
torch.testing.assert_allclose(input_tensor, nf4_to_dtype, atol=0.13, rtol=0.13)
209209

210210
@unittest.skipIf(not torch.cuda.is_available(), "Need cuda for test")
211211
def test_to_copy_device(self):
212-
inpt_tensor = torch.rand(128, device='cpu')
213-
t = to_nf4(inpt_tensor, 32, 2)
212+
input_tensor = torch.rand(128, device='cpu')
213+
t = to_nf4(input_tensor, 32, 2)
214214
assert t.device == torch.device('cpu')
215215
z = t.cuda()
216216
assert z.device.type == "cuda" # Because the device could be cuda:0
217217
x = z.cpu()
218218
assert x.device == torch.device('cpu')
219219

220-
inpt_tensor = torch.rand(128, device='cuda')
221-
t = to_nf4(inpt_tensor, 32, 2)
220+
input_tensor = torch.rand(128, device='cuda')
221+
t = to_nf4(input_tensor, 32, 2)
222222
assert t.device.type == "cuda"
223223

224224
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])
225225
def test_to_dtype(self, dtype: torch.dtype):
226-
inpt_tensor = torch.rand(128, dtype=dtype)
227-
inpt_tensor_nf4 = to_nf4(inpt_tensor, 32, 2)
228-
assert type(inpt_tensor_nf4) != torch.Tensor
229-
assert type(inpt_tensor_nf4.to(dtype)) == torch.Tensor
230-
assert inpt_tensor_nf4.to(dtype).dtype == dtype
226+
input_tensor = torch.rand(128, dtype=dtype)
227+
input_tensor_nf4 = to_nf4(input_tensor, 32, 2)
228+
assert type(input_tensor_nf4) != torch.Tensor
229+
assert type(input_tensor_nf4.to(dtype)) == torch.Tensor
230+
assert input_tensor_nf4.to(dtype).dtype == dtype
231231

232232
@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
233233
@parametrize("dtype", [torch.bfloat16, torch.float16, torch.float32])

torchao/dtypes/nf4tensor.py

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -387,22 +387,22 @@ class SubclassTensorArgs:
387387
requires_grad: bool
388388

389389

390-
def get_block_absmax(inpt_tensor: torch.Tensor, block_size: int) -> torch.Tensor:
390+
def get_block_absmax(input_tensor: torch.Tensor, block_size: int) -> torch.Tensor:
391391
"""Iterate through a flattened tensor getting the absmax scalers for each block
392392
393393
Args:
394-
inpt_tensor: Input tensor to get scalers for
394+
input_tensor: Input tensor to get scalers for
395395
block_size: Block size for the scanning window
396396
Returns:
397397
torch.Tensor: Tensor of scalers for each block
398398
"""
399-
assert inpt_tensor.dim() == 1, "Input tensor must be flattened"
399+
assert input_tensor.dim() == 1, "Input tensor must be flattened"
400400
assert (
401-
inpt_tensor.numel() % block_size
402-
) == 0, f"Input tensor must be divisible by block size, got {inpt_tensor.numel()} and {block_size}"
401+
input_tensor.numel() % block_size
402+
) == 0, f"Input tensor must be divisible by block size, got {input_tensor.numel()} and {block_size}"
403403

404-
n_blocks = inpt_tensor.numel() // block_size
405-
blocks = inpt_tensor.view(n_blocks, block_size)
404+
n_blocks = input_tensor.numel() // block_size
405+
blocks = input_tensor.view(n_blocks, block_size)
406406
block_scalers = blocks.abs().max(dim=1).values
407407
return block_scalers
408408

@@ -478,18 +478,18 @@ def __init__(
478478
@torch.no_grad()
479479
def from_tensor(
480480
cls,
481-
inpt_tensor: torch.Tensor,
481+
input_tensor: torch.Tensor,
482482
block_size: int,
483483
scaler_block_size: int,
484484
):
485-
assert inpt_tensor.dim() <= 2, f"expect input tensor dim <= 2 but got dim = {inpt_tensor.dim()}"
485+
assert input_tensor.dim() <= 2, f"expect input tensor dim <= 2 but got dim = {input_tensor.dim()}"
486486
assert (
487-
inpt_tensor.numel() % block_size == 0
488-
), f"Input tensor must be divisible by block size, got {inpt_tensor.numel()} and {block_size}"
489-
assert inpt_tensor.is_contiguous, "Input tensor must be contiguous!"
487+
input_tensor.numel() % block_size == 0
488+
), f"Input tensor must be divisible by block size, got {input_tensor.numel()} and {block_size}"
489+
assert input_tensor.is_contiguous, "Input tensor must be contiguous!"
490490
# I think I want do this
491-
# assert not inpt_tensor.requires_grad, "Input tensor must not require grad"
492-
device = inpt_tensor.device
491+
# assert not input_tensor.requires_grad, "Input tensor must not require grad"
492+
device = input_tensor.device
493493
# Cache the tensor on the class def
494494
nf4 = torch.tensor(
495495
[
@@ -511,27 +511,27 @@ def from_tensor(
511511
1.0000,
512512
],
513513
device=device,
514-
dtype=inpt_tensor.dtype,
514+
dtype=input_tensor.dtype,
515515
)
516-
n_blocks = inpt_tensor.numel() // block_size
516+
n_blocks = input_tensor.numel() // block_size
517517
# Double quantization
518518
(
519519
quantized_scalers,
520520
quantization_factor,
521521
scaler_mean,
522522
) = cls.double_quantize_scalers(
523-
inpt_tensor.flatten(), block_size, scaler_block_size
523+
input_tensor.flatten(), block_size, scaler_block_size
524524
)
525525
quantized_data = cls.convert_to_norm_float_weight(
526-
inpt_tensor, n_blocks, block_size, nf4
526+
input_tensor, n_blocks, block_size, nf4
527527
)
528528
tensor_meta = SubclassTensorArgs(
529-
inpt_tensor.size(),
530-
inpt_tensor.stride(),
531-
inpt_tensor.storage_offset(),
532-
inpt_tensor.dtype,
533-
inpt_tensor.device,
534-
inpt_tensor.requires_grad,
529+
input_tensor.size(),
530+
input_tensor.stride(),
531+
input_tensor.storage_offset(),
532+
input_tensor.dtype,
533+
input_tensor.device,
534+
input_tensor.requires_grad,
535535
)
536536
return cls(
537537
tensor_meta,
@@ -547,7 +547,7 @@ def from_tensor(
547547

548548
@staticmethod
549549
def double_quantize_scalers(
550-
inpt_tensor: torch.Tensor,
550+
input_tensor: torch.Tensor,
551551
block_size: int,
552552
scaler_block_size: int,
553553
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
@@ -557,22 +557,22 @@ def double_quantize_scalers(
557557
And then we calculate the absmax quantization factors for each block again. We then quantize the scalers to int8.
558558
559559
Args:
560-
inpt_tensor: Input tensor to convert to QLoRA format, typically a weight tensor
560+
input_tensor: Input tensor to convert to QLoRA format, typically a weight tensor
561561
562562
Returns:
563563
torch.Tensor: Tensor of per_block quantization factors stored in int8 format
564564
size: (n_blocks)
565565
torch.Tensor: Tensor of per_scaler_block quantization factors stored in int16 format
566566
size: (n_scaler_blocks)
567567
"""
568-
assert inpt_tensor.dim() == 1, "Input tensor must be flattened"
568+
assert input_tensor.dim() == 1, "Input tensor must be flattened"
569569
assert (
570-
inpt_tensor.numel() % scaler_block_size
571-
) == 0, f"Input tensor must be divisible by block size, got {inpt_tensor.numel()} and {scaler_block_size}"
570+
input_tensor.numel() % scaler_block_size
571+
) == 0, f"Input tensor must be divisible by block size, got {input_tensor.numel()} and {scaler_block_size}"
572572

573573
# First round of quantization
574-
# Produces: A tensor of size (n_blocks) of inpt_tensor.dtype
575-
scalers_1 = get_block_absmax(inpt_tensor, block_size)
574+
# Produces: A tensor of size (n_blocks) of input_tensor.dtype
575+
scalers_1 = get_block_absmax(input_tensor, block_size)
576576
scalers_1_mean = scalers_1.mean()
577577
scalers_1 = scalers_1 - scalers_1_mean
578578
# Second round of quantization
@@ -607,52 +607,52 @@ def double_quantize_scalers(
607607

608608
def dequantize_scalers(
609609
self,
610-
inpt_tensor: torch.Tensor,
610+
input_tensor: torch.Tensor,
611611
quantization_factor: torch.Tensor,
612612
scaler_block_size: int,
613613
) -> torch.Tensor:
614614
"""Used to unpack the double quantized scalers
615615
616616
Args;
617-
inpt_tensor: Input tensor to convert to QLoRA format this is the quantized scalers in int8 format
617+
input_tensor: Input tensor to convert to QLoRA format this is the quantized scalers in int8 format
618618
quantization_factor: Tensor of per_scaler_block quantization factors stored in inpt_weight.dtype
619619
size: (n_scaler_blocks)
620620
scaler_block_size: Scaler block size to use for double quantization.
621621
622622
"""
623-
assert inpt_tensor.dim() == 1, "Input tensor must be flattened"
623+
assert input_tensor.dim() == 1, "Input tensor must be flattened"
624624
assert (
625-
inpt_tensor.numel() % scaler_block_size
626-
) == 0, f"Input tensor must be divisible by block size, got {inpt_tensor.numel()} and {scaler_block_size}"
627-
n_scaler_blocks = inpt_tensor.numel() // scaler_block_size
628-
inpt_tensor = inpt_tensor.view(n_scaler_blocks, scaler_block_size)
629-
dequantized = (inpt_tensor / quantization_factor.unsqueeze(-1)).flatten().to(
625+
input_tensor.numel() % scaler_block_size
626+
) == 0, f"Input tensor must be divisible by block size, got {input_tensor.numel()} and {scaler_block_size}"
627+
n_scaler_blocks = input_tensor.numel() // scaler_block_size
628+
input_tensor = input_tensor.view(n_scaler_blocks, scaler_block_size)
629+
dequantized = (input_tensor / quantization_factor.unsqueeze(-1)).flatten().to(
630630
self.dtype
631631
) + self.scaler_mean
632632
return dequantized
633633

634634
@staticmethod
635635
def convert_to_norm_float_weight(
636-
inpt_tensor: torch.Tensor, n_blocks: int, block_size: int, nf4: torch.Tensor
636+
input_tensor: torch.Tensor, n_blocks: int, block_size: int, nf4: torch.Tensor
637637
) -> torch.Tensor:
638638
"""Convert a tensor to the normalized float weight format"""
639-
flattened_tensor = inpt_tensor.flatten()
639+
flattened_tensor = input_tensor.flatten()
640640
# Since we are using uint8 we will encode 2 entries per byte
641-
numel = inpt_tensor.numel()
641+
numel = input_tensor.numel()
642642
assert (
643643
numel % 2 == 0
644644
), "Number of elements must be even just to not have to think about the end"
645645
# Reshape the flattened tensor into blocks of size self.block_size
646646
blocks = flattened_tensor.view(n_blocks, block_size)
647647

648648
# Scale the blocks
649-
scalers = get_block_absmax(inpt_tensor.flatten(), block_size)
649+
scalers = get_block_absmax(input_tensor.flatten(), block_size)
650650
scales = scalers.unsqueeze(-1).expand(n_blocks, block_size)
651651
scaled_blocks = blocks / scales
652652

653653
# Returns a flattened tensor with each element quantized to nf4 index
654654
# See Note: Quantize in Chunks
655-
quantized_blocks = torch.empty(numel, dtype=torch.uint8, device=inpt_tensor.device)
655+
quantized_blocks = torch.empty(numel, dtype=torch.uint8, device=input_tensor.device)
656656
flattened = scaled_blocks.flatten()
657657
for chunk_num in range(math.ceil(numel / CHUNK_SIZE)):
658658
start = chunk_num * CHUNK_SIZE

torchao/float8/inference.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,15 +174,15 @@ def from_float(
174174

175175

176176
def cast_to_float8_e4m3_inference(
177-
inpt_tensor: torch.Tensor,
177+
input_tensor: torch.Tensor,
178178
linear_mm_config: LinearMMConfig,
179179
reduce_amax: bool = False,
180180
static_quantization_scale: Optional[torch.Tensor] = None,
181181
) -> Float8Tensor:
182182
"""Casts an input tensor to the Float8 (e4m3fn*)
183183
184184
Args:
185-
inpt_tensor: The input tensor to be cast.
185+
input_tensor: The input tensor to be cast.
186186
linear_mm_config: Configuration settings for the matrix multiplication
187187
reduce_amax: Whether to reduce the amax (absolute maximum) among the local distributed group.
188188
static_quantization_scale: Optional tensor specifying the scale for activation. Default is None.
@@ -193,15 +193,15 @@ def cast_to_float8_e4m3_inference(
193193
Note:
194194
If the input tensor is already in Float8 format, it is returned as is without re-casting.
195195
"""
196-
if tensor_already_casted_to_fp8(inpt_tensor):
197-
return inpt_tensor
196+
if tensor_already_casted_to_fp8(input_tensor):
197+
return input_tensor
198198
scale = (
199199
static_quantization_scale
200200
if static_quantization_scale is not None
201-
else tensor_to_scale(inpt_tensor, e4m3_dtype, reduce_amax)
201+
else tensor_to_scale(input_tensor, e4m3_dtype, reduce_amax)
202202
)
203203
return hp_tensor_and_scale_to_float8(
204-
inpt_tensor,
204+
input_tensor,
205205
scale,
206206
e4m3_dtype,
207207
linear_mm_config,

0 commit comments

Comments
 (0)