Skip to content

Commit 01f16c4

Browse files
authored
support habana fp8 UT test in CI (#1909)
Signed-off-by: chensuyue <[email protected]>
1 parent 28578b9 commit 01f16c4

File tree

10 files changed

+252
-17
lines changed

10 files changed

+252
-17
lines changed

.azure-pipelines/scripts/install_nc.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22

33
echo -e "\n Install Neural Compressor ... "
44
cd /neural-compressor
5-
if [[ $1 = *"3x_pt" ]]; then
5+
if [[ $1 = *"3x_pt"* ]]; then
66
python -m pip install --no-cache-dir -r requirements_pt.txt
77
python setup.py pt bdist_wheel
8-
pip install dist/neural_compressor*.whl --force-reinstall
8+
pip install --no-deps dist/neural_compressor*.whl --force-reinstall
99
elif [[ $1 = *"3x_tf"* ]]; then
1010
python -m pip install --no-cache-dir -r requirements_tf.txt
1111
python setup.py tf bdist_wheel

.azure-pipelines/scripts/ut/3x/collect_log_3x.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor
2525
git fetch
2626
git checkout master
2727
rm -rf build dist *egg-info
28-
echo y | pip uninstall neural_compressor_${1}
28+
binary_index="${1%_fp8}"
29+
echo y | pip uninstall neural_compressor_${binary_index}
2930
cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1}
3031

3132
coverage erase
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[run]
2+
branch = True
3+
4+
[report]
5+
include =
6+
*/neural_compressor/torch/algorithms/habana_fp8/*
7+
*/neural_compressor/torch/amp/*
8+
exclude_lines =
9+
pragma: no cover
10+
raise NotImplementedError
11+
raise TypeError
12+
if self.device == "gpu":
13+
if device == "gpu":
14+
except ImportError:
15+
except Exception as e:

.azure-pipelines/scripts/ut/3x/run_3x_pt.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
1515
cd /neural-compressor/test/3x || exit 1
1616
rm -rf tensorflow
1717
rm -rf onnxrt
18+
rm -rf torch/algorithms/fp8_quant
1819

1920
LOG_DIR=/neural-compressor/log_dir
2021
mkdir -p ${LOG_DIR}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/bash
2+
python -c "import neural_compressor as nc"
3+
test_case="run 3x Torch Habana FP8"
4+
echo "${test_case}"
5+
6+
# install requirements
7+
echo "set up UT env..."
8+
sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
9+
pip install -r /neural-compressor/test/3x/torch/requirements.txt
10+
pip install git+https://github.com/HabanaAI/[email protected]
11+
pip install pytest-cov
12+
pip install pytest-html
13+
pip list
14+
15+
export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
16+
inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
17+
cd /neural-compressor/test/3x || exit 1
18+
19+
LOG_DIR=/neural-compressor/log_dir
20+
mkdir -p ${LOG_DIR}
21+
ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log
22+
pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name}
23+
24+
cp report.html ${LOG_DIR}/
25+
26+
if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
27+
echo "Find errors in pytest case, please check the output..."
28+
echo "Please search for '== FAILURES ==' or '== ERRORS =='"
29+
exit 1
30+
fi
31+
32+
# if ut pass, collect the coverage file into artifacts
33+
cp .coverage ${LOG_DIR}/.coverage
34+
35+
echo "UT finished successfully! "

.azure-pipelines/template/docker-template.yml

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ parameters:
1616
- name: repo
1717
type: string
1818
default: "https://github.com/intel/neural-compressor"
19+
- name: imageSource
20+
type: string
21+
default: "build"
1922

2023
steps:
2124
- task: Bash@3
@@ -24,7 +27,7 @@ steps:
2427
script: |
2528
docker ps -a
2629
if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then
27-
docker start $(docker ps -aq)
30+
docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}")
2831
echo "remove left files through container ..."
2932
docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor || true"
3033
fi
@@ -57,19 +60,25 @@ steps:
5760
git checkout master
5861
displayName: "Checkout out master"
5962
60-
- script: |
61-
if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
62-
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
63-
fi
64-
docker images | grep -i ${{ parameters.repoName }}
65-
if [[ $? -ne 0 ]]; then
66-
echo "NO Such Repo"
67-
exit 1
68-
fi
69-
displayName: "Build develop docker image"
63+
- ${{ if eq(parameters.imageSource, 'build') }}:
64+
- script: |
65+
if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
66+
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
67+
fi
68+
docker images | grep -i ${{ parameters.repoName }}
69+
if [[ $? -ne 0 ]]; then
70+
echo "NO Such Repo"
71+
exit 1
72+
fi
73+
displayName: "Build develop docker image"
74+
75+
- ${{ if eq(parameters.imageSource, 'pull') }}:
76+
- script: |
77+
docker pull vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
78+
displayName: "Pull habana docker image"
7079
7180
- script: |
72-
docker stop $(docker ps -aq)
81+
docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}")
7382
docker rm -vf ${{ parameters.containerName }} || true
7483
env | sort
7584
displayName: "Clean docker container"
@@ -79,8 +88,15 @@ steps:
7988
inputs:
8089
targetType: "inline"
8190
script: |
82-
docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
83-
-v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }}
91+
if [[ "${{ parameters.imageSource }}" == "build" ]]; then
92+
docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
93+
-v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
94+
${{ parameters.repoName }}:${{ parameters.repoTag }}
95+
else
96+
docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
97+
--runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
98+
-v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
99+
fi
84100
echo "Show the container list after docker run ... "
85101
docker ps -a
86102
displayName: "Docker run - ${{ parameters.containerName }} Container"

.azure-pipelines/template/ut-template.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ parameters:
1717
- name: utContainerName
1818
type: string
1919
default: "utTest"
20+
- name: imageSource
21+
type: string
22+
default: "build"
2023

2124
steps:
2225
- template: docker-template.yml
@@ -27,6 +30,7 @@ steps:
2730
dockerFileName: "Dockerfile"
2831
containerName: ${{ parameters.utContainerName }}
2932
repo: ${{ parameters.repo }}
33+
imageSource: ${{ parameters.imageSource }}
3034

3135
- script: |
3236
docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \

.azure-pipelines/ut-3x-pt-fp8.yml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
trigger: none
2+
3+
pr:
4+
autoCancel: true
5+
drafts: false
6+
branches:
7+
include:
8+
- master
9+
paths:
10+
include:
11+
- neural_compressor/common
12+
- setup.py
13+
- requirements_pt.txt
14+
- .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
15+
16+
pool: GAUDI
17+
18+
variables:
19+
IMAGE_NAME: "neural-compressor"
20+
IMAGE_TAG: "py310"
21+
UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
22+
DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
23+
ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8"
24+
REPO: $(Build.Repository.Uri)
25+
26+
stages:
27+
- stage: Torch_habana
28+
displayName: Torch 3x Habana FP8
29+
dependsOn: []
30+
jobs:
31+
- job:
32+
displayName: Torch 3x Habana FP8
33+
steps:
34+
- template: template/ut-template.yml
35+
parameters:
36+
imageSource: "pull"
37+
dockerConfigName: "commonDockerConfig"
38+
utScriptFileName: "3x/run_3x_pt_fp8"
39+
uploadPath: $(UPLOAD_PATH)
40+
utArtifact: "ut_3x_pt_fp8"
41+
42+
- stage: Torch_habana_baseline
43+
displayName: Torch 3x Habana FP8 baseline
44+
dependsOn: []
45+
jobs:
46+
- job:
47+
displayName: Torch 3x Habana FP8 baseline
48+
steps:
49+
- template: template/ut-template.yml
50+
parameters:
51+
imageSource: "pull"
52+
dockerConfigName: "gitCloneDockerConfig"
53+
utScriptFileName: "3x/run_3x_pt_fp8"
54+
uploadPath: $(UPLOAD_PATH)
55+
utArtifact: "ut_3x_pt_fp8"
56+
57+
- stage: Coverage
58+
displayName: "Coverage Compare"
59+
pool:
60+
vmImage: "ubuntu-latest"
61+
dependsOn: [Torch_habana, Torch_habana_baseline]
62+
jobs:
63+
- job: CollectDatafiles
64+
steps:
65+
- script: |
66+
if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
67+
docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
68+
fi
69+
docker images | grep -i ${IMAGE_NAME}
70+
if [[ $? -ne 0 ]]; then
71+
echo "NO Such Repo"
72+
exit 1
73+
fi
74+
displayName: "Build develop docker image"
75+
76+
- task: DownloadPipelineArtifact@2
77+
inputs:
78+
artifact:
79+
patterns: '*_coverage/.coverage'
80+
path: $(DOWNLOAD_PATH)
81+
82+
- script: |
83+
echo "--- create container ---"
84+
docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
85+
echo "--- docker ps ---"
86+
docker ps
87+
echo "--- collect logs ---"
88+
docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \
89+
&& bash install_nc.sh 3x_pt_fp8 \
90+
&& bash ut/3x/collect_log_3x.sh 3x_pt_fp8"
91+
displayName: "Collect UT Coverage"
92+
93+
- task: PublishPipelineArtifact@1
94+
condition: succeededOrFailed()
95+
inputs:
96+
targetPath: $(UPLOAD_PATH)
97+
artifact: $(ARTIFACT_NAME)
98+
publishLocation: "pipeline"
99+
100+
- task: Bash@3
101+
condition: always()
102+
inputs:
103+
targetType: "inline"
104+
script: |
105+
docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
106+
displayName: "Docker clean up"

neural_compressor/torch/utils/environ.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def is_package_available(package_name):
4646
## check hpex
4747
if is_package_available("habana_frameworks"):
4848
_hpex_available = True
49+
import habana_frameworks.torch.hpex # pylint: disable=E0401
4950
else:
5051
_hpex_available = False
5152

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import os
2+
import sys
3+
import time
4+
5+
import habana_frameworks.torch.core as htcore
6+
import torch
7+
import torch.nn as nn
8+
import torch.nn.functional as F
9+
from torch.utils.data import DataLoader
10+
from torchvision import datasets, transforms
11+
12+
13+
class Net(nn.Module):
14+
def __init__(self):
15+
super(Net, self).__init__()
16+
self.fc1 = nn.Linear(784, 256)
17+
self.fc2 = nn.Linear(256, 64)
18+
self.fc3 = nn.Linear(64, 10)
19+
20+
def forward(self, x):
21+
out = x.view(-1, 28 * 28)
22+
out = F.relu(self.fc1(out))
23+
out = F.relu(self.fc2(out))
24+
out = self.fc3(out)
25+
out = F.log_softmax(out, dim=1)
26+
return out
27+
28+
29+
def test_hpu():
30+
model = Net()
31+
model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
32+
model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
33+
os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path))
34+
checkpoint = torch.load(model_path)
35+
model.load_state_dict(checkpoint)
36+
37+
model = model.eval()
38+
39+
model = model.to("hpu")
40+
41+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
42+
43+
data_path = "./data"
44+
test_kwargs = {"batch_size": 32}
45+
dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
46+
test_loader = torch.utils.data.DataLoader(dataset1, **test_kwargs)
47+
48+
correct = 0
49+
for batch_idx, (data, label) in enumerate(test_loader):
50+
data = data.to("hpu")
51+
output = model(data)
52+
htcore.mark_step()
53+
correct += output.max(1)[1].eq(label).sum()
54+
55+
accuracy = 100.0 * correct / (len(test_loader) * 32)
56+
assert accuracy > 90

0 commit comments

Comments
 (0)