Implement QAT for APoT (pytorch#83282)

asl3 · pytorchmergebot · commit 0e0f8fd03e09 · 2022-08-12T04:00:06.000Z
### Summary: This PR implements QAT for APoT FakeQuant. It runs QAT with FX graph mode quantized models (Resnet-18 pre-trained model, full ImageNet dataset) to compare accuracy metrics for different qconfig settings of uniform vs. APoT quantized activation and weight. It also refactors the APoT PTQ module `apot_fx_graph_mode_ptq.py` (previously `fx_graph_mode_apot.py`) such that shared helper functions between PTQ and QAT are in a separate file `quantization_util.py`. Model #2 (uniformly quantized activation, APoT quantized weight) shows comparable accuracy compared to model #1 (uniformly quantized activation, APoT quantized weight) for 8-bit and significant accuracy improvement for 4-bit (see "Accuracy Stats" section below). ### Test Plan: Run QAT models with: `python test/quantization/core/experimental/apot_qat.py` Run PTQ models with: `python test/quantization/core/experimental/apot_ptq.py` ### Accuracy Stats 8-bit (Uniform int8, APoT b = 8 k = 2) Model #1: Uniform activation, uniform weight (FX Graph Mode quantized) Evaluation accuracy on test dataset: 69.67% (Top-1), 89.04% (Top-5) Model #2: Uniform activation, APoT weight (FX Graph Mode quantized) Evaluation accuracy on test dataset: 69.72% (Top-1), 89.06% (Top-5) 4-bit (Uniform int4, APoT b = 4 k = 2) Model #1: Uniform activation, uniform weight (FX Graph Mode quantized) Evaluation accuracy on test dataset: 46.85% (Top-1), 72.85% (Top-5) Model #2: Uniform activation, APoT weight (FX Graph Mode quantized) Evaluation accuracy on test dataset: 66.45% (Top-1), 86.23% (Top-5) Pull Request resolved: pytorch#83282 Approved by: https://github.com/jerryzh168
diff --git a/test/quantization/core/experimental/apot_fx_graph_mode_ptq.py b/test/quantization/core/experimental/apot_fx_graph_mode_ptq.py
@@ -0,0 +1,131 @@
+import torch
+import torch.nn as nn
+import torch.quantization
+from torchvision.models.quantization.resnet import resnet18
+from torch.ao.quantization.experimental.quantization_helper import (
+    evaluate,
+    prepare_data_loaders
+)
+
+# validation dataset: full ImageNet dataset
+data_path = '~/my_imagenet/'
+
+data_loader, data_loader_test = prepare_data_loaders(data_path)
+criterion = nn.CrossEntropyLoss()
+float_model = resnet18(pretrained=True)
+float_model.eval()
+
+# deepcopy the model since we need to keep the original model around
+import copy
+model_to_quantize = copy.deepcopy(float_model)
+
+model_to_quantize.eval()
+
+"""
+Prepare models
+"""
+
+# Note that this is temporary, we'll expose these functions to torch.quantization after official releasee
+from torch.quantization.quantize_fx import prepare_qat_fx
+
+def calibrate(model, data_loader):
+    model.eval()
+    with torch.no_grad():
+        for image, target in data_loader:
+            model(image)
+
+from torch.ao.quantization.experimental.qconfig import (
+    uniform_qconfig_8bit,
+    apot_weights_qconfig_8bit,
+    apot_qconfig_8bit,
+    uniform_qconfig_4bit,
+    apot_weights_qconfig_4bit,
+    apot_qconfig_4bit
+)
+
+"""
+Prepare full precision model
+"""
+full_precision_model = float_model
+
+top1, top5 = evaluate(full_precision_model, criterion, data_loader_test)
+print("Model #0 Evaluation accuracy on test dataset: %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model PTQ for specified qconfig for torch.nn.Linear
+"""
+def prepare_ptq_linear(qconfig):
+    qconfig_dict = {"object_type": [(torch.nn.Linear, qconfig)]}
+    prepared_model = prepare_qat_fx(copy.deepcopy(float_model), qconfig_dict)  # fuse modules and insert observers
+    calibrate(prepared_model, data_loader_test)  # run calibration on sample data
+    return prepared_model
+
+"""
+Prepare model with uniform activation, uniform weight
+b=8, k=2
+"""
+
+prepared_model = prepare_ptq_linear(uniform_qconfig_8bit)
+quantized_model = convert_fx(prepared_model)  # convert the calibrated model to a quantized model
+
+top1, top5 = evaluate(quantized_model, criterion, data_loader_test)
+print("Model #1 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, uniform weight
+b=4, k=2
+"""
+
+prepared_model = prepare_ptq_linear(uniform_qconfig_4bit)
+quantized_model = convert_fx(prepared_model)  # convert the calibrated model to a quantized model
+
+top1, top5 = evaluate(quantized_model1, criterion, data_loader_test)
+print("Model #1 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, APoT weight
+(b=8, k=2)
+"""
+
+prepared_model = prepare_ptq_linear(apot_weights_qconfig_8bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #2 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, APoT weight
+(b=4, k=2)
+"""
+
+prepared_model = prepare_ptq_linear(apot_weights_qconfig_4bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #2 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+
+"""
+Prepare model with APoT activation and weight
+(b=8, k=2)
+"""
+
+prepared_model = prepare_ptq_linear(apot_qconfig_8bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #3 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with APoT activation and weight
+(b=4, k=2)
+"""
+
+prepared_model = prepare_ptq_linear(apot_qconfig_4bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #3 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare eager mode quantized model
+"""
+eager_quantized_model = resnet18(pretrained=True, quantize=True).eval()
+top1, top5 = evaluate(eager_quantized_model, criterion, data_loader_test)
+print("Eager mode quantized model evaluation accuracy on test dataset: %2.2f, %2.2f" % (top1.avg, top5.avg))
diff --git a/test/quantization/core/experimental/apot_fx_graph_mode_qat.py b/test/quantization/core/experimental/apot_fx_graph_mode_qat.py
@@ -0,0 +1,94 @@
+from torchvision.models.quantization.resnet import resnet18
+from torch.ao.quantization.experimental.quantization_helper import (
+    evaluate,
+    prepare_data_loaders,
+    training_loop
+)
+
+# training and validation dataset: full ImageNet dataset
+data_path = '~/my_imagenet/'
+
+train_batch_size = 30
+eval_batch_size = 50
+
+data_loader, data_loader_test = prepare_data_loaders(data_path)
+criterion = nn.CrossEntropyLoss()
+float_model = resnet18(pretrained=True)
+float_model.eval()
+
+# deepcopy the model since we need to keep the original model around
+import copy
+model_to_quantize = copy.deepcopy(float_model)
+
+model_to_quantize.eval()
+
+"""
+Prepare model QAT for specified qconfig for torch.nn.Linear
+"""
+def prepare_qat_linear(qconfig):
+    qconfig_dict = {"object_type": [(torch.nn.Linear, qconfig)]}
+    prepared_model = prepare_fx(copy.deepcopy(float_model), qconfig_dict)  # fuse modules and insert observers
+    training_loop(prepared_model, criterion, data_loader)
+    prepared_model.eval()
+    return prepared_model
+
+"""
+Prepare model with uniform activation, uniform weight
+b=8, k=2
+"""
+
+prepared_model = prepare_qat_linear(uniform_qconfig_8bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #1 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, uniform weight
+b=4, k=2
+"""
+
+prepared_model = prepare_qat_linear(uniform_qconfig_4bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #1 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, APoT weight
+(b=8, k=2)
+"""
+
+prepared_model = prepare_qat_linear(apot_weights_qconfig_8bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #2 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with uniform activation, APoT weight
+(b=4, k=2)
+"""
+
+prepared_model = prepare_qat_linear(apot_weights_qconfig_4bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #2 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+
+"""
+Prepare model with APoT activation and weight
+(b=8, k=2)
+"""
+
+prepared_model = prepare_qat_linear(apot_qconfig_8bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #3 Evaluation accuracy on test dataset (b=8, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
+
+"""
+Prepare model with APoT activation and weight
+(b=4, k=2)
+"""
+
+prepared_model = prepare_qat_linear(apot_qconfig_4bit)
+
+top1, top5 = evaluate(prepared_model, criterion, data_loader_test)
+print("Model #3 Evaluation accuracy on test dataset (b=4, k=2): %2.2f, %2.2f" % (top1.avg, top5.avg))
diff --git a/test/quantization/core/experimental/quantization_util.py b/test/quantization/core/experimental/quantization_util.py