diff --git a/.circleci/config.yml b/.circleci/config.yml index 903e33d4b7..257ccfaf51 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -2,96 +2,681 @@ # See: https://circleci.com/docs/2.0/configuration-reference version: 2.1 -# Define a job to be invoked later in a workflow. -# See: https://circleci.com/docs/2.0/configuration-reference/#jobs -jobs: - build: - machine: - # Primary container image where all steps run. - # image: nvcr.io/nvidia/tensorrt:22.01-py3 # does not work with customized image - # https://circleci.com/docs/2.0/configuration-reference#available-linux-gpu-images - image: ubuntu-2004-cuda-11.4:202110-01 - resource_class: gpu.nvidia.large +commands: + install-bazel: + description: "Install bazel" + parameters: + platform: + type: string + default: "x86_64" + version: + type: string + default: "5.1.1" steps: - - checkout - run: - name: install cudnn + tensorrt + bazel + name: Install bazel + command: | + sudo wget -q https://github.com/bazelbuild/bazel/releases/download/<< parameters.version >>/bazel-<< parameters.version >>-linux-<< parameters.platform >> -O /usr/bin/bazel + sudo chmod a+x /usr/bin/bazel + + install-cuda: + description: "Install CUDA" + parameters: + os: + type: string + default: "ubuntu2004" + platform: + type: string + default: "x86_64" + cuda-pkg-name: + type: string + default: "cuda-toolkit-11-4" + steps: + - run: + name: Install CUDA command: | cd ~ - OS=ubuntu2004 - CUDNN_VERSION=8.2.1.*-1+cuda11.3 - TRT_VERSION=8.2.4-1+cuda11.4 - BAZEL_VERSION=5.1.1 - - wget https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin - sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/7fa2af80.pub + + wget https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/cuda-<< parameters.os >>.pin + sudo mv cuda-<< parameters.os >>.pin /etc/apt/preferences.d/cuda-repository-pin-600 + + sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/3bf863cc.pub + sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/ /" + sudo apt-get update + + sudo apt-get install -y << parameters.cuda-pkg-name >> + - run: + when: on_fail + name: Dump apt sources + command: cat /etc/apt/sources.list + + create-env: + description: "Install dependencies for Torch-TensorRT" + parameters: + os: + type: string + default: "ubuntu2004" + platform: + type: string + default: "x86_64" + cudnn-version: + type: string + default: "8.2.1" + trt-version-short: + type: string + default: "8.2.4" + bazel-version: + type: string + default: "5.1.1" + bazel-platform: + type: string + default: "x86_64" + steps: + - run: + name: Install cudnn + tensorrt + command: | + cd ~ + + wget https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/cuda-<< parameters.os >>.pin + sudo mv cuda-<< parameters.os >>.pin /etc/apt/preferences.d/cuda-repository-pin-600 + sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/7fa2af80.pub sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35 sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC - sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/ /" + sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/ /" sudo apt-get update - sudo apt-get install libcudnn8=${CUDNN_VERSION} - sudo apt-get install libcudnn8-dev=${CUDNN_VERSION} + sudo apt-get install libcudnn8=<< parameters.cudnn-version >>* + sudo apt-get install libcudnn8-dev=<< parameters.cudnn-version >>* - sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/{OS}/x86_64/3bf863cc.pub - sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/ /" + sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/3bf863cc.pub + sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/<< parameters.os >>/<< parameters.platform >>/ /" sudo apt-get update - - sudo apt-get install libnvinfer8=${TRT_VERSION} libnvonnxparsers8=${TRT_VERSION} libnvparsers8=${TRT_VERSION} libnvinfer-plugin8=${TRT_VERSION} libnvinfer-dev=${TRT_VERSION} libnvonnxparsers-dev=${TRT_VERSION} libnvparsers-dev=${TRT_VERSION} libnvinfer-plugin-dev=${TRT_VERSION} python3-libnvinfer=${TRT_VERSION} - # check available version, apt list libnvinfer8 -a - sudo wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-x86_64 -O /usr/bin/bazel - sudo chmod a+x /usr/bin/bazel + sudo apt-get install libnvinfer8=<< parameters.trt-version-short >>* libnvinfer-plugin8=<< parameters.trt-version-short>>* libnvinfer-dev=<< parameters.trt-version-short>>* libnvinfer-plugin-dev=<< parameters.trt-version-short>>* + - install-bazel: + platform: << parameters.bazel-platform >> + version: << parameters.bazel-version >> + + create-py-env: + description: "Install python dependencies" + parameters: + trt-version-long: + type: string + default: "8.2.4.2" + steps: + - run: + name: Set up python environment + command: | + pip3 install --upgrade pip + pip3 install wheel setuptools + pip3 install nvidia-pyindex + pip3 install tabulate + pip3 install nvidia-tensorrt==<< parameters.trt-version-long >> + pip3 install pytest parameterized expecttest nox + # install torch_tensorrt + + install-torch-from-index: + description: "Install python dependencies" + parameters: + torch-build: + type: string + default: "1.11.0+cu113" + torch-build-index: + type: string + default: "https://download.pytorch.org/whl/cu113" + steps: - run: - name: set up python environment + name: Install Torch command: | - pip3 install nvidia-pyindex - pip3 install nvidia-tensorrt==8.2.4.2 - pip3 install --pre torch==1.13.0.dev20220621 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cu113 - pip3 install pytest parameterized expecttest - pip3 install tabulate - # install torch_tensorrt - mv WORKSPACE.ci WORKSPACE + pip3 install --upgrade pip + pip3 install --pre torch==<< parameters.torch-build >> torchvision torchaudio --extra-index-url << parameters.torch-build-index >> + + build-py: + description: "Build the torch-tensorrt python release (pre-cxx11-abi)" + parameters: + platform: + type: string + default: "x86_64" + steps: + - run: + name: Build torch-tensorrt python release (pre-cxx11-abi) + command: | + mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE cd py + python3 -m pip install wheel setuptools + python3 -m pip install pybind11==2.6.2 + python3 setup.py bdist_wheel python3 setup.py install + mkdir -p /tmp/dist/builds + cp dist/* /tmp/dist/builds + + build-py-cxx11-abi: + description: "Build the torch-tensorrt python release (cxx11-abi)" + parameters: + platform: + type: string + default: "x86_64" + steps: + - run: + name: Build torch-tensorrt python release + command: | + mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE + cd py + python3 -m pip install wheel setuptools + python3 -m pip install pybind11==2.6.2 + python3 setup.py bdist_wheel --use-cxx11-abi + python3 setup.py install --use-cxx11-abi + mkdir -p /tmp/dist/builds + cp dist/* /tmp/dist/builds + + build-py-fx-only: + description: "Build the torch-tensorrt python release with only the fx backend" + parameters: + platform: + type: string + default: "x86_64" + steps: + - run: + name: Build torch-tensorrt python release with only the fx backend + command: | + mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE + cd py + python3 -m pip install wheel setuptools + python3 -m pip install pybind11==2.6.2 + python3 setup.py bdist_wheel --fx-only + python3 setup.py install --fx-only + mkdir -p /tmp/dist/builds + cp dist/* /tmp/dist/builds + + dump-test-env: + description: "Dump the test env to console" + steps: + - run: + name: GPU Config + command: | + nvidia-smi + + - run: + name: Test torch + command: | + python3 -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.device_count())" - # install fx2trt - # cd py/torch_tensorrt/fx/setup - # python3 setup.py install - run: - name: run fx2trt tests + name: Get torch-tensorrt version information command: | + python3 -c "import torch_tensorrt; torch_tensorrt.dump_build_info()" + + pull-test-models: + description: "Pull the test model set" + steps: + - run: + name: Pull test models + environment: + USE_HOST_DEPS: "1" + command: | + cd tests/modules + pip3 install -r requirements.txt + python3 hub.py + cd ~/project + + test-ts-core: + description: "Test torchscript backend c++ api" + parameters: + platform: + type: string + default: "x86_64" + steps: + - pull-test-models + - run: mkdir -p /tmp/artifacts + - run: + name: Run core / C++ tests + environment: + LD_LIBRARY_PATH: "/home/circleci/project/bazel-project/external/libtorch_pre_cxx11_abi/lib/:/home/circleci/project/bazel-project/external/tensorrt/lib/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH" + command: | + set -e + mv toolchains/ci_workspaces/WORKSPACE.<< parameters.platform >> WORKSPACE + bazel query 'kind(cc_*, tests(//tests))' --noshow_progress >> /tmp/test_manifest.txt + circleci tests split < /tmp/test_manifest.txt > /tmp/node_test_manifest.txt + bazel test $(cat /tmp/node_test_manifest.txt) --test_arg=--gtest_output=xml:/tmp/artifacts/test_results/ --jobs 4 --config ci_testing --config pre_cxx11_abi --noshow_progress + - run: + name: Collect logs + when: on_fail + command: | + mkdir -p /tmp/testlogs + cp -r bazel-testlogs /tmp/testlogs + sudo apt install tree + tree . > /tmp/testlogs/dir_structure.txt + + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + + test-ts-py-api: + description: "Run L0 torch-tensorrt python tests" + steps: + - pull-test-models + - run: + name: Run L0 torch-tensorrt python tests + environment: + USE_HOST_DEPS: "1" + PYT_PATH: "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/" + LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu/:/usr/local/cuda/lib64/:$LD_LIBRARY_PATH" + command: | + set -e + mkdir -p /tmp/artifacts/test_results + cd tests/py + pip3 install -r requirements.txt + pytest --junitxml=/tmp/artifacts/test_results/api/api_test_results.xml api/ + pytest --junitxml=/tmp/artifacts/test_results/integrations/integrations_test_results.xml integrations/ + cd ~/project + + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + + + test-fx: + description: "Test the fx backend" + steps: + - run: + name: Run fx tests + command: | + mkdir -p /tmp/artifacts/test_results # one fix pending to enable below # cd py/torch_tensorrt/fx/test # pytest $(find . -name '*.py' | grep -v test_dispatch* | grep -v test_setitem*) - cd py/torch_tensorrt/fx/test pushd converters/acc_op - pytest + pytest --junitxml=/tmp/artifacts/test_results/fx/converters/acc_op/test_results.xml popd pushd passes - list_passes=$(ls | grep -v test_setitem*) - pytest $list_passes + list_passes=$(ls | grep -v test_setitem*) + pytest $list_passes --junitxml=/tmp/artifacts/test_results/fx/passes/test_results.xml popd pushd core - pytest + pytest --junitxml=/tmp/artifacts/test_results/fx/core/test_results.xml popd # pushd quant - # pytest + # pytest --junitxml=/tmp/artifacts/test_results/fx/quant/test_results.xml # popd pushd tools - pytest + pytest --junitxml=/tmp/artifacts/test_results/fx/tools/test_results.xml popd pushd trt_lower - pytest + pytest --junitxml=/tmp/artifacts/test_results/fx/trt_lower/test_results.xml popd pushd tracer - list_tracer=$(ls | grep -v test_dispatch_*) - pytest $list_tracer + list_tracer=$(ls | grep -v test_dispatch_*) + pytest $list_tracer --junitxml=/tmp/artifacts/test_results/fx/tracer/test_results.xml popd + cd ~/project + - store_test_results: + path: /tmp/artifacts + - store_artifacts: + path: /tmp/testlogs + +# Define a job to be invoked later in a workflow. +# See: https://circleci.com/docs/2.0/configuration-reference/#jobs +jobs: + build-aarch64-pyt-jetson: + parameters: + torch-build: + type: string + jetpack-version: + type: string + cxx11-abi: + type: boolean + default: true + python-version: + type: string + default: 3.8.10 + machine: + image: ubuntu-2004:202201-02 + resource_class: arm.xlarge + steps: + - checkout + #- run: + # name: Upgrade base + # command: | + # sudo apt clean + # sudo apt update + # sudo apt upgrade + # sudo apt install software-properties-common + - install-cuda: + os: "ubuntu2004" + platform: "sbsa" + cuda-pkg-name: "cuda-toolkit-11-4" + - run: + name: Install openblas + command: sudo apt install libopenblas-dev + - create-env: + os: "ubuntu2004" + platform: "sbsa" + cudnn-version: << pipeline.parameters.cudnn-jetson-version >> + trt-version-short: << pipeline.parameters.trt-jetson-version-short >> + bazel-version: "5.1.1" + bazel-platform: "arm64" + - run: + name: Set python version + command: | + pyenv install << parameters.python-version >> + pyenv global << parameters.python-version >> + - run: + name: Install NGC Torch + environment: + TORCH_INSTALL: https://developer.download.nvidia.com/compute/redist/jp/v<< parameters.jetpack-version >>/pytorch/<< parameters.torch-build >> + command: | + set -e + python3 -m pip install --upgrade pip; python3 -m pip install setuptools wheel; python3 -m pip install expecttest xmlrunner hypothesis aiohttp numpy=='1.19.4' pyyaml scipy=='1.5.3' ninja cython typing_extensions protobuf; export "LD_LIBRARY_PATH=/usr/lib/llvm-8/lib:$LD_LIBRARY_PATH"; python3 -m pip install --upgrade protobuf; python3 -m pip install --no-cache $TORCH_INSTALL + - when: + condition: << parameters.cxx11-abi >> + steps: + - build-py-cxx11-abi: + platform: "sbsa" + - unless: + condition: << parameters.cxx11-abi >> + steps: + - build-py: + platform: "sbsa" + - run: + name: Move to release dir + command: | + mkdir -p /tmp/dist/jetson + cp -r /tmp/dist/builds/* /tmp/dist/jetson + - persist_to_workspace: + root: /tmp/dist + paths: + - jetson + - store_artifacts: + path: /tmp/dist/jetson + destination: aarch64-pyt-jetson + + build-x86_64-pyt-release: + parameters: + torch-build: + type: string + torch-build-index: + type: string + cxx11-abi: + type: boolean + default: false + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: xlarge + steps: + - checkout + - create-env: + os: "ubuntu2004" + platform: "x86_64" + cudnn-version: << pipeline.parameters.cudnn-release-version >> + trt-version-short: << pipeline.parameters.trt-release-version-short >> + bazel-version: "5.1.1" + bazel-platform: "x86_64" + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - when: + condition: << parameters.cxx11-abi >> + steps: + - build-py-cxx11-abi + - unless: + condition: << parameters.cxx11-abi >> + steps: + - build-py + - run: + name: Move to release dir + command: | + mkdir -p /tmp/dist/release + cp -r /tmp/dist/builds/* /tmp/dist/release + - persist_to_workspace: + root: /tmp/dist + paths: + - release + - store_artifacts: + path: /tmp/dist/release + destination: x86_64-pyt-release + + build-x86_64-pyt-nightly: + parameters: + torch-build: + type: string + torch-build-index: + type: string + cxx11-abi: + type: boolean + default: false + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: xlarge + steps: + - checkout + - create-env: + os: "ubuntu2004" + platform: "x86_64" + cudnn-version: << pipeline.parameters.cudnn-nightly-version >> + trt-version-short: << pipeline.parameters.trt-nightly-version-short >> + bazel-version: "5.1.1" + bazel-platform: "x86_64" + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - when: + condition: << parameters.cxx11-abi >> + steps: + - build-py-cxx11-abi + - unless: + condition: << parameters.cxx11-abi >> + steps: + - build-py + - run: + name: Move to nightly dir + command: | + mkdir -p /tmp/dist/nightly + cp -r /tmp/dist/builds/* /tmp/dist/nightly + - persist_to_workspace: + root: /tmp/dist + paths: + - nightly + - store_artifacts: + path: /tmp/dist/nightly + destination: x86_64-pyt-nightly + + test-core-cpp-x86_64-pyt-release: + parameters: + torch-build: + type: string + torch-build-index: + type: string + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: gpu.nvidia.large + parallelism: 4 + steps: + - checkout + - create-env: + os: "ubuntu2004" + platform: "x86_64" + cudnn-version: << pipeline.parameters.cudnn-release-version >> + trt-version-short: << pipeline.parameters.trt-release-version-short >> + bazel-version: "5.1.1" + bazel-platform: "x86_64" + - create-py-env: + trt-version-long: << pipeline.parameters.trt-release-version-long >> + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - attach_workspace: + at: /tmp/dist + - run: + name: "Install torch-tensorrt" + command: pip3 install /tmp/dist/release/* + - dump-test-env + - test-ts-core + + test-ts-py-x86_64-pyt-release: + parameters: + torch-build: + type: string + torch-build-index: + type: string + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: gpu.nvidia.large + steps: + - checkout + - create-py-env: + trt-version-long: << pipeline.parameters.trt-release-version-long >> + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - attach_workspace: + at: /tmp/dist + - run: + name: "Install torch-tensorrt" + command: pip3 install /tmp/dist/release/* + - dump-test-env + - test-ts-py-api + + test-x86_64-pyt-nightly: + parameters: + torch-build: + type: string + torch-build-index: + type: string + machine: + image: ubuntu-2004-cuda-11.4:202110-01 + resource_class: gpu.nvidia.large + steps: + - checkout + - create-py-env: + trt-version-long: << pipeline.parameters.trt-nightly-version-long >> + - attach_workspace: + at: /tmp/dist/ + - run: + name: "Install torch-tensorrt" + command: pip3 install /tmp/dist/nightly/* + # We install torch after torch-trt because pip automatically enforces the version constraint otherwise, swap back after versions are synced + - install-torch-from-index: + torch-build: << parameters.torch-build >> + torch-build-index: << parameters.torch-build-index >> + - dump-test-env + - test-fx + +parameters: + # Nightly platform config + torch-nightly-build: + type: string + default: "1.13.0.dev20220715+cu113" + torch-nightly-build-index: + type: string + default: "https://download.pytorch.org/whl/nightly/cu113" + cudnn-nightly-version: + type: string + default: "8.2.1" + trt-nightly-version-short: + type: string + default: "8.2.4" + trt-nightly-version-long: + type: string + default: "8.2.4.2" + + # Release platform config + torch-release-build: + type: string + default: "1.11.0+cu113" + torch-release-build-index: + type: string + default: "https://download.pytorch.org/whl/cu113" + cudnn-release-version: + type: string + default: "8.2.1" + trt-release-version-short: + type: string + default: "8.2.4" + trt-release-version-long: + type: string + default: "8.2.4.2" + + # Jetson platform config + torch-jetson-build: + type: string + default: "torch-1.12.0a0+84d1cb9.nv22.4-cp38-cp38-linux_aarch64.whl" + jetpack-version: + type: string + default: "50" + cudnn-jetson-version: + type: string + default: "8.3.2" + trt-jetson-version-short: + type: string + default: "8.4.1" + trt-jetson-version-long: + type: string + default: "8.4.1.5" + # Invoke jobs via workflows # See: https://circleci.com/docs/2.0/configuration-reference/#workflows workflows: - build_run: + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + jobs: + - build-aarch64-pyt-jetson: + torch-build: << pipeline.parameters.torch-jetson-build >> + jetpack-version: << pipeline.parameters.jetpack-version >> + python-version: 3.8.10 + + - build-x86_64-pyt-release: + torch-build: << pipeline.parameters.torch-release-build >> + torch-build-index: << pipeline.parameters.torch-release-build-index >> + - test-core-cpp-x86_64-pyt-release: + torch-build: << pipeline.parameters.torch-release-build >> + torch-build-index: << pipeline.parameters.torch-release-build-index >> + requires: + - build-x86_64-pyt-release + + - build-x86_64-pyt-nightly: + torch-build: << pipeline.parameters.torch-nightly-build >> + torch-build-index: << pipeline.parameters.torch-nightly-build-index >> + - test-x86_64-pyt-nightly: + torch-build: << pipeline.parameters.torch-nightly-build >> + torch-build-index: << pipeline.parameters.torch-nightly-build-index >> + requires: + - build-x86_64-pyt-nightly + + on-push: jobs: - - build + - build-aarch64-pyt-jetson: + torch-build: << pipeline.parameters.torch-jetson-build >> + jetpack-version: << pipeline.parameters.jetpack-version >> + python-version: 3.8.10 + + - build-x86_64-pyt-release: + torch-build: << pipeline.parameters.torch-release-build >> + torch-build-index: << pipeline.parameters.torch-release-build-index >> + - test-core-cpp-x86_64-pyt-release: + torch-build: << pipeline.parameters.torch-release-build >> + torch-build-index: << pipeline.parameters.torch-release-build-index >> + requires: + - build-x86_64-pyt-release + - test-ts-py-x86_64-pyt-release: + torch-build: << pipeline.parameters.torch-release-build >> + torch-build-index: << pipeline.parameters.torch-release-build-index >> + requires: + - build-x86_64-pyt-release + + - build-x86_64-pyt-nightly: + torch-build: << pipeline.parameters.torch-nightly-build >> + torch-build-index: << pipeline.parameters.torch-nightly-build-index >> + - test-x86_64-pyt-nightly: + torch-build: << pipeline.parameters.torch-nightly-build >> + torch-build-index: << pipeline.parameters.torch-nightly-build-index >> + requires: + - build-x86_64-pyt-nightly + diff --git a/.gitignore b/.gitignore index 9571f39288..dd940571ad 100644 --- a/.gitignore +++ b/.gitignore @@ -62,3 +62,6 @@ bazel-Torch-TensorRT-Preview docsrc/src/ bazel-TensorRT bazel-tensorrt +.pytest_cache +*.cache +*cifar-10-batches-py* \ No newline at end of file diff --git a/examples/int8/training/vgg16/requirements.txt b/examples/int8/training/vgg16/requirements.txt index ed1268164a..dcb184324b 100644 --- a/examples/int8/training/vgg16/requirements.txt +++ b/examples/int8/training/vgg16/requirements.txt @@ -1,3 +1,6 @@ torch>=1.10.0 tensorboard>=1.14.0 -pytorch-quantization --extra-index-url https://pypi.ngc.nvidia.com +nvidia-pyindex +--extra-index-url https://pypi.ngc.nvidia.com +pytorch-quantization>=2.1.2 +tqdm \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index 99084b8194..01a3f9bc08 100644 --- a/noxfile.py +++ b/noxfile.py @@ -5,16 +5,22 @@ # Use system installed Python packages PYT_PATH='/opt/conda/lib/python3.8/site-packages' if not 'PYT_PATH' in os.environ else os.environ["PYT_PATH"] +print(f"Using python path {PYT_PATH}") # Set the root directory to the directory of the noxfile unless the user wants to # TOP_DIR TOP_DIR=os.path.dirname(os.path.realpath(__file__)) if not 'TOP_DIR' in os.environ else os.environ["TOP_DIR"] +print(f"Test root directory {TOP_DIR}") # Set the USE_CXX11=1 to use cxx11_abi USE_CXX11=0 if not 'USE_CXX11' in os.environ else os.environ["USE_CXX11"] +if USE_CXX11: + print("Using cxx11 abi") # Set the USE_HOST_DEPS=1 to use host dependencies for tests USE_HOST_DEPS=0 if not 'USE_HOST_DEPS' in os.environ else os.environ["USE_HOST_DEPS"] +if USE_HOST_DEPS: + print("Using dependencies from host python") SUPPORTED_PYTHON_VERSIONS=["3.7", "3.8", "3.9", "3.10"] @@ -58,6 +64,12 @@ def download_datasets(session): def train_model(session): session.chdir(os.path.join(TOP_DIR, 'examples/int8/training/vgg16')) + session.install("-r", "requirements.txt") + if os.path.exists('vgg16_ckpts/ckpt_epoch25.pth'): + session.run_always('python', + 'export_ckpt.py', + 'vgg16_ckpts/ckpt_epoch25.pth') + return if USE_HOST_DEPS: session.run_always('python', 'main.py', @@ -140,14 +152,14 @@ def run_base_tests(session): print("Running basic tests") session.chdir(os.path.join(TOP_DIR, 'tests/py')) tests = [ - "test_api.py", - "test_to_backend_api.py", + "api", + "integrations/test_to_backend_api.py", ] for test in tests: if USE_HOST_DEPS: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + session.run_always('pytest', test, env={'PYTHONPATH': PYT_PATH}) else: - session.run_always("python", test) + session.run_always("pytest", test) def run_accuracy_tests(session): print("Running accuracy tests") @@ -169,7 +181,7 @@ def copy_model(session): session.run_always('cp', '-rpf', os.path.join(TOP_DIR, src_file), - os.path.join(TOP_DIR, str('tests/py/') + file_name), + os.path.join(TOP_DIR, str('tests/modules/') + file_name), external=True) def run_int8_accuracy_tests(session): @@ -177,15 +189,15 @@ def run_int8_accuracy_tests(session): copy_model(session) session.chdir(os.path.join(TOP_DIR, 'tests/py')) tests = [ - "test_ptq_dataloader_calibrator.py", - "test_ptq_to_backend.py", - "test_qat_trt_accuracy.py", + "ptq/test_ptq_to_backend.py", + "ptq/test_ptq_dataloader_calibrator.py", + "qat/", ] for test in tests: if USE_HOST_DEPS: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + session.run_always('pytest', test, env={'PYTHONPATH': PYT_PATH}) else: - session.run_always("python", test) + session.run_always("pytest", test) def run_trt_compatibility_tests(session): print("Running TensorRT compatibility tests") @@ -197,9 +209,9 @@ def run_trt_compatibility_tests(session): ] for test in tests: if USE_HOST_DEPS: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + session.run_always('pytest', test, env={'PYTHONPATH': PYT_PATH}) else: - session.run_always("python", test) + session.run_always("pytest", test) def run_dla_tests(session): print("Running DLA tests") @@ -209,9 +221,9 @@ def run_dla_tests(session): ] for test in tests: if USE_HOST_DEPS: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + session.run_always('pytest', test, env={'PYTHONPATH': PYT_PATH}) else: - session.run_always("python", test) + session.run_always("pytest", test) def run_multi_gpu_tests(session): print("Running multi GPU tests") @@ -221,9 +233,9 @@ def run_multi_gpu_tests(session): ] for test in tests: if USE_HOST_DEPS: - session.run_always('python', test, env={'PYTHONPATH': PYT_PATH}) + session.run_always('pytest', test, env={'PYTHONPATH': PYT_PATH}) else: - session.run_always("python", test) + session.run_always("pytest", test) def run_l0_api_tests(session): if not USE_HOST_DEPS: @@ -245,7 +257,6 @@ def run_l1_accuracy_tests(session): if not USE_HOST_DEPS: install_deps(session) install_torch_trt(session) - download_models(session) download_datasets(session) train_model(session) run_accuracy_tests(session) @@ -255,7 +266,6 @@ def run_l1_int8_accuracy_tests(session): if not USE_HOST_DEPS: install_deps(session) install_torch_trt(session) - download_models(session) download_datasets(session) train_model(session) finetune_model(session) @@ -313,4 +323,8 @@ def l2_multi_gpu_tests(session): @nox.session(python=SUPPORTED_PYTHON_VERSIONS, reuse_venv=True) def download_test_models(session): """Grab all the models needed for testing""" + try: + import torch + except ModuleNotFoundError: + install_deps(session) download_models(session) diff --git a/py/requirements.txt b/py/requirements.txt index 8d12c108aa..fce4b91dca 100644 --- a/py/requirements.txt +++ b/py/requirements.txt @@ -1,5 +1,3 @@ --f https://download.pytorch.org/whl/torch_stable.html --f https://download.pytorch.org/whl/torch/ --extra-index-url https://download.pytorch.org/whl/cu113 -torch==1.11.0+cu113 +torch==1.11.0 pybind11==2.6.2 diff --git a/py/torch_tensorrt/_util.py b/py/torch_tensorrt/_util.py index ba260f8958..94a58cfcc5 100644 --- a/py/torch_tensorrt/_util.py +++ b/py/torch_tensorrt/_util.py @@ -1,6 +1,8 @@ from torch_tensorrt import __version__ from torch_tensorrt import _C +import torch + def dump_build_info(): """Prints build information about the torch_tensorrt distribution to stdout @@ -15,7 +17,9 @@ def get_build_info() -> str: str: String containing the build information for torch_tensorrt distribution """ build_info = _C.get_build_info() - build_info = "Torch-TensorRT Version: " + str(__version__) + '\n' + build_info + build_info = "Torch-TensorRT Version: " + str(__version__) + '\n' \ + + "Using PyTorch Version: " + str(torch.__version__) + '\n' \ + + build_info return build_info diff --git a/tests/modules/hub.py b/tests/modules/hub.py index 57764494e9..48e6b519cb 100644 --- a/tests/modules/hub.py +++ b/tests/modules/hub.py @@ -80,7 +80,7 @@ "model": timm.create_model('vit_base_patch16_224', pretrained=True), "path": "script" }, - "pool": { + "pooling": { "model": cm.Pool(), "path": "trace" }, @@ -104,7 +104,7 @@ "model": cm.FallbackInplaceOPIf(), "path": "script" }, - "bert-base-uncased": { + "bert_base_uncased": { "model": cm.BertModule(), "path": "trace" } diff --git a/tests/modules/requirements.txt b/tests/modules/requirements.txt index b1a922e034..d4b5105850 100644 --- a/tests/modules/requirements.txt +++ b/tests/modules/requirements.txt @@ -1,4 +1,2 @@ --f https://download.pytorch.org/whl/torch_stable.html -#torch==1.11.0+cu113 timm==v0.4.12 transformers==4.17.0 diff --git a/tests/py/api/test_classes.py b/tests/py/api/test_classes.py new file mode 100644 index 0000000000..d21c12a750 --- /dev/null +++ b/tests/py/api/test_classes.py @@ -0,0 +1,190 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict + +class TestDevice(unittest.TestCase): + + def test_from_string_constructor(self): + device = torchtrt.Device("cuda:0") + self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) + self.assertEqual(device.gpu_id, 0) + + device = torchtrt.Device("gpu:1") + self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) + self.assertEqual(device.gpu_id, 1) + + def test_from_string_constructor_dla(self): + device = torchtrt.Device("dla:0") + self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) + self.assertEqual(device.gpu_id, 0) + self.assertEqual(device.dla_core, 0) + + device = torchtrt.Device("dla:1", allow_gpu_fallback=True) + self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) + self.assertEqual(device.gpu_id, 0) + self.assertEqual(device.dla_core, 1) + self.assertEqual(device.allow_gpu_fallback, True) + + def test_kwargs_gpu(self): + device = torchtrt.Device(gpu_id=0) + self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) + self.assertEqual(device.gpu_id, 0) + + def test_kwargs_dla_and_settings(self): + device = torchtrt.Device(dla_core=1, allow_gpu_fallback=False) + self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) + self.assertEqual(device.gpu_id, 0) + self.assertEqual(device.dla_core, 1) + self.assertEqual(device.allow_gpu_fallback, False) + + device = torchtrt.Device(gpu_id=1, dla_core=0, allow_gpu_fallback=True) + self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) + self.assertEqual(device.gpu_id, 1) + self.assertEqual(device.dla_core, 0) + self.assertEqual(device.allow_gpu_fallback, True) + + def test_from_torch(self): + device = torchtrt.Device._from_torch_device(torch.device("cuda:0")) + self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) + self.assertEqual(device.gpu_id, 0) + + +class TestInput(unittest.TestCase): + + def _verify_correctness(self, struct: torchtrt.Input, target: Dict) -> bool: + internal = struct._to_internal() + + list_eq = lambda al, bl: all([a == b for (a, b) in zip(al, bl)]) + + eq = lambda a, b: a == b + + def field_is_correct(field, equal_fn, a1, a2): + equal = equal_fn(a1, a2) + if not equal: + print("\nField {} is incorrect: {} != {}".format(field, a1, a2)) + return equal + + min_ = field_is_correct("min", list_eq, internal.min, target["min"]) + opt_ = field_is_correct("opt", list_eq, internal.opt, target["opt"]) + max_ = field_is_correct("max", list_eq, internal.max, target["max"]) + is_dynamic_ = field_is_correct("is_dynamic", eq, internal.input_is_dynamic, target["input_is_dynamic"]) + explicit_set_dtype_ = field_is_correct("explicit_dtype", eq, internal._explicit_set_dtype, + target["explicit_set_dtype"]) + dtype_ = field_is_correct("dtype", eq, int(internal.dtype), int(target["dtype"])) + format_ = field_is_correct("format", eq, int(internal.format), int(target["format"])) + + return all([min_, opt_, max_, is_dynamic_, explicit_set_dtype_, dtype_, format_]) + + def test_infer_from_example_tensor(self): + shape = [1, 3, 255, 255] + target = { + "min": shape, + "opt": shape, + "max": shape, + "input_is_dynamic": False, + "dtype": torchtrt.dtype.half, + "format": torchtrt.TensorFormat.contiguous, + "explicit_set_dtype": True + } + + example_tensor = torch.randn(shape).half() + i = torchtrt.Input._from_tensor(example_tensor) + self.assertTrue(self._verify_correctness(i, target)) + + def test_static_shape(self): + shape = [1, 3, 255, 255] + target = { + "min": shape, + "opt": shape, + "max": shape, + "input_is_dynamic": False, + "dtype": torchtrt.dtype.unknown, + "format": torchtrt.TensorFormat.contiguous, + "explicit_set_dtype": False + } + + i = torchtrt.Input(shape) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(tuple(shape)) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(torch.randn(shape).shape) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(shape=shape) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(shape=tuple(shape)) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(shape=torch.randn(shape).shape) + self.assertTrue(self._verify_correctness(i, target)) + + def test_data_type(self): + shape = [1, 3, 255, 255] + target = { + "min": shape, + "opt": shape, + "max": shape, + "input_is_dynamic": False, + "dtype": torchtrt.dtype.half, + "format": torchtrt.TensorFormat.contiguous, + "explicit_set_dtype": True + } + + i = torchtrt.Input(shape, dtype=torchtrt.dtype.half) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(shape, dtype=torch.half) + self.assertTrue(self._verify_correctness(i, target)) + + def test_tensor_format(self): + shape = [1, 3, 255, 255] + target = { + "min": shape, + "opt": shape, + "max": shape, + "input_is_dynamic": False, + "dtype": torchtrt.dtype.unknown, + "format": torchtrt.TensorFormat.channels_last, + "explicit_set_dtype": False + } + + i = torchtrt.Input(shape, format=torchtrt.TensorFormat.channels_last) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(shape, format=torch.channels_last) + self.assertTrue(self._verify_correctness(i, target)) + + def test_dynamic_shape(self): + min_shape = [1, 3, 128, 128] + opt_shape = [1, 3, 256, 256] + max_shape = [1, 3, 512, 512] + target = { + "min": min_shape, + "opt": opt_shape, + "max": max_shape, + "input_is_dynamic": True, + "dtype": torchtrt.dtype.unknown, + "format": torchtrt.TensorFormat.contiguous, + "explicit_set_dtype": False + } + + i = torchtrt.Input(min_shape=min_shape, opt_shape=opt_shape, max_shape=max_shape) + self.assertTrue(self._verify_correctness(i, target)) + + i = torchtrt.Input(min_shape=tuple(min_shape), opt_shape=tuple(opt_shape), max_shape=tuple(max_shape)) + self.assertTrue(self._verify_correctness(i, target)) + + tensor_shape = lambda shape: torch.randn(shape).shape + i = torchtrt.Input(min_shape=tensor_shape(min_shape), + opt_shape=tensor_shape(opt_shape), + max_shape=tensor_shape(max_shape)) + self.assertTrue(self._verify_correctness(i, target)) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/api/test_e2e_behavior.py b/tests/py/api/test_e2e_behavior.py new file mode 100644 index 0000000000..0229d28cce --- /dev/null +++ b/tests/py/api/test_e2e_behavior.py @@ -0,0 +1,190 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict + +class TestCompileHalf(unittest.TestCase): + + def test_compile_script_half(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + self.scripted_model.half() + + compile_spec = { + "inputs": [torchtrt.Input(shape=self.input.shape, dtype=torch.half)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.half} + } + + trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) + same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() + torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) + self.assertTrue(same < 3e-2) + + def test_compile_script_half_by_default(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + self.scripted_model.half() + + compile_spec = { + "inputs": [torchtrt.Input(shape=self.input.shape)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float, torch.half} + } + + trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) + same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() + torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) + self.assertTrue(same < 3e-2) + + +class TestFallbackToTorch(unittest.TestCase): + + def test_fallback(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + + compile_spec = { + "inputs": [torchtrt.Input(self.input.shape)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + "allow_gpu_fallback": False, + "disable_tf32": False + }, + "require_full_compilation": False, + "torch_executed_ops": ["aten::max_pool2d"], + "min_block_size": 1 + } + + trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-3) + + def test_module_fallback(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + + compile_spec = { + "inputs": [torchtrt.Input(self.input.shape)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + "allow_gpu_fallback": False, + "disable_tf32": False + }, + "require_full_compilation": False, + "torch_executed_modules": ["torchvision.models.resnet.BasicBlock"], + "min_block_size": 1 + } + + trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-3) + +class TestInputTypeDefaultsFP32Model(unittest.TestCase): + + def test_input_use_default_fp32(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + ts_model = torch.jit.script(self.model) + trt_mod = torchtrt.ts.compile(ts_model, + inputs=[torchtrt.Input(self.input.shape)], + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input) + + def test_input_respect_user_setting_fp32_weights_fp16_in(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + ts_model = torch.jit.script(self.model) + trt_mod = torchtrt.ts.compile(ts_model, + inputs=[self.input.half()], + require_full_compilation=True, + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input.half()) + + def test_input_respect_user_setting_fp32_weights_fp16_in_non_constructor(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + ts_model = torch.jit.script(self.model) + input_spec = torchtrt.Input(self.input.shape) + input_spec.dtype = torch.half + + trt_mod = torchtrt.ts.compile(ts_model, + inputs=[input_spec], + require_full_compilation=True, + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input.half()) + + +class TestInputTypeDefaultsFP16Model(unittest.TestCase): + + def test_input_use_default_fp16(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + half_mod = torch.jit.script(self.model) + half_mod.half() + + trt_mod = torchtrt.ts.compile(half_mod, + inputs=[torchtrt.Input(self.input.shape)], + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input.half()) + + def test_input_use_default_fp16_without_fp16_enabled(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + half_mod = torch.jit.script(self.model) + half_mod.half() + + trt_mod = torchtrt.ts.compile(half_mod, inputs=[torchtrt.Input(self.input.shape)]) + trt_mod(self.input.half()) + + def test_input_respect_user_setting_fp16_weights_fp32_in(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + half_mod = torch.jit.script(self.model) + half_mod.half() + + trt_mod = torchtrt.ts.compile(half_mod, + inputs=[self.input], + require_full_compilation=True, + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input) + + def test_input_respect_user_setting_fp16_weights_fp32_in_non_constuctor(self): + self.model = models.resnet18(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + + half_mod = torch.jit.script(self.model) + half_mod.half() + + input_spec = torchtrt.Input(self.input.shape) + input_spec.dtype = torch.float + + trt_mod = torchtrt.ts.compile(half_mod, + inputs=[input_spec], + require_full_compilation=True, + enabled_precisions={torch.float, torch.half}) + trt_mod(self.input) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/api/test_logging.py b/tests/py/api/test_logging.py new file mode 100644 index 0000000000..81d8478c8d --- /dev/null +++ b/tests/py/api/test_logging.py @@ -0,0 +1,72 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict + +class TestLoggingAPIs(unittest.TestCase): + + def test_logging_prefix(self): + new_prefix = "Python API Test: " + torchtrt.logging.set_logging_prefix(new_prefix) + logging_prefix = torchtrt.logging.get_logging_prefix() + self.assertEqual(new_prefix, logging_prefix) + + def test_reportable_log_level(self): + new_level = torchtrt.logging.Level.Error + torchtrt.logging.set_reportable_log_level(new_level) + level = torchtrt.logging.get_reportable_log_level() + self.assertEqual(new_level, level) + + def test_is_colored_output_on(self): + torchtrt.logging.set_is_colored_output_on(True) + color = torchtrt.logging.get_is_colored_output_on() + self.assertTrue(color) + + def test_context_managers(self): + base_lvl = torchtrt.logging.get_reportable_log_level() + with torchtrt.logging.internal_errors(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.InternalError, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + + with torchtrt.logging.errors(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.Error, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + + with torchtrt.logging.warnings(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.Warning, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + + with torchtrt.logging.info(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.Info, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + + with torchtrt.logging.debug(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.Debug, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + + with torchtrt.logging.graphs(): + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(torchtrt.logging.Level.Graph, lvl) + + lvl = torchtrt.logging.get_reportable_log_level() + self.assertEqual(base_lvl, lvl) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/api/test_ts_backend.py b/tests/py/api/test_ts_backend.py new file mode 100644 index 0000000000..3ab1604f90 --- /dev/null +++ b/tests/py/api/test_ts_backend.py @@ -0,0 +1,159 @@ +import unittest +import torch_tensorrt as torchtrt +import torch +import torchvision.models as models +import copy +from typing import Dict + +class TestCompile(unittest.TestCase): + + def test_compile_traced(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.traced_model = torch.jit.trace(self.model, [self.input]) + + compile_spec = { + "inputs": [torchtrt.Input(self.input.shape, dtype=torch.float, format=torch.contiguous_format)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float} + } + + trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_compile_script(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + with torch.no_grad(): + trt_mod = torchtrt.ts.compile(self.scripted_model, + inputs=[self.input], + device=torchtrt.Device(gpu_id=0), + enabled_precisions={torch.float}) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_compile_global(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.scripted_model = torch.jit.script(self.model) + trt_mod = torchtrt.compile(self.scripted_model, + inputs=[self.input], + device=torchtrt.Device(gpu_id=0), + enabled_precisions={torch.float}) + same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_compile_global_nn_mod(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + with torch.no_grad(): + trt_mod = torchtrt.compile(self.model, + inputs=[self.input], + device=torchtrt.Device(gpu_id=0), + enabled_precisions={torch.float}) + same = (trt_mod(self.input) - self.model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_from_torch_tensor(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.traced_model = torch.jit.trace(self.model, [self.input]) + compile_spec = { + "inputs": [self.input], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float} + } + + trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_device(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.traced_model = torch.jit.trace(self.model, [self.input]) + compile_spec = {"inputs": [self.input], "device": torchtrt.Device("gpu:0"), "enabled_precisions": {torch.float}} + + trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_default_device(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.traced_model = torch.jit.trace(self.model, [self.input]) + compile_spec = {"inputs": [self.input], "enabled_precisions": {torch.float}} + + trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + + def test_compile_script_from_dict(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.traced_model = torch.jit.trace(self.model, [self.input]) + compile_spec = { + "inputs": [torchtrt.Input(shape=self.input.shape)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + }, + "enabled_precisions": {torch.float} + } + + trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) + same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() + self.assertTrue(same < 2e-2) + +class TestPTtoTRTtoPT(unittest.TestCase): + + def test_pt_to_trt_to_pt(self): + self.model = models.vgg16(pretrained=True).eval().to("cuda") + self.input = torch.randn((1, 3, 224, 224)).to("cuda") + self.ts_model = torch.jit.trace(self.model, [self.input]) + + compile_spec = { + "inputs": [torchtrt.Input(self.input.shape)], + "device": { + "device_type": torchtrt.DeviceType.GPU, + "gpu_id": 0, + "allow_gpu_fallback": False, + "disable_tf32": False + } + } + + trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.ts_model, "forward", **compile_spec) + trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine, torchtrt.Device("cuda:0")) + same = (trt_mod(self.input) - self.ts_model(self.input)).abs().max() + self.assertTrue(same < 2e-3) + +class TestCheckMethodOpSupport(unittest.TestCase): + + def test_check_support(self): + module = models.alexnet(pretrained=True).eval().to("cuda") + self.module = torch.jit.trace(module, torch.ones((1, 3, 224, 224)).to("cuda")) + + self.assertTrue(torchtrt.ts.check_method_op_support(self.module, "forward")) + + +class TestModuleIdentification(unittest.TestCase): + + def test_module_type(self): + nn_module = models.alexnet(pretrained=True).eval().to("cuda") + ts_module = torch.jit.trace(nn_module, torch.ones([1, 3, 224, 224]).to("cuda")) + fx_module = torch.fx.symbolic_trace(nn_module) + + self.assertEqual(torchtrt._compile._parse_module_type(nn_module), torchtrt._compile._ModuleType.nn) + self.assertEqual(torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts) + self.assertEqual(torchtrt._compile._parse_module_type(fx_module), torchtrt._compile._ModuleType.fx) + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/test_api_dla.py b/tests/py/hw/test_api_dla.py similarity index 100% rename from tests/py/test_api_dla.py rename to tests/py/hw/test_api_dla.py diff --git a/tests/py/test_multi_gpu.py b/tests/py/hw/test_multi_gpu.py similarity index 100% rename from tests/py/test_multi_gpu.py rename to tests/py/hw/test_multi_gpu.py diff --git a/tests/py/test_to_backend_api.py b/tests/py/integrations/test_to_backend_api.py similarity index 77% rename from tests/py/test_to_backend_api.py rename to tests/py/integrations/test_to_backend_api.py index 11c411ff56..1607e029f2 100644 --- a/tests/py/test_to_backend_api.py +++ b/tests/py/integrations/test_to_backend_api.py @@ -3,13 +3,12 @@ import torch import torchvision.models as models -from model_test_case import ModelTestCase - -class TestToBackendLowering(ModelTestCase): +class TestToBackendLowering(unittest.TestCase): def setUp(self): self.input = torch.randn((1, 3, 300, 300)).to("cuda") + self.model = models.resnet18(pretrained=True).eval().to("cuda") self.scripted_model = torch.jit.script(self.model) self.spec = { "forward": @@ -37,17 +36,5 @@ def test_to_backend_lowering(self): same = (trt_mod.forward(self.input) - self.scripted_model(self.input)).abs().max() self.assertTrue(same < 2e-3) - -def test_suite(): - suite = unittest.TestSuite() - suite.addTest(TestToBackendLowering.parametrize(TestToBackendLowering, model=models.resnet18(pretrained=True))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/test_trt_intercompatibility.py b/tests/py/integrations/test_trt_intercompatibility.py similarity index 77% rename from tests/py/test_trt_intercompatibility.py rename to tests/py/integrations/test_trt_intercompatibility.py index e1d614a200..742040022a 100644 --- a/tests/py/test_trt_intercompatibility.py +++ b/tests/py/integrations/test_trt_intercompatibility.py @@ -4,16 +4,13 @@ import torchvision.models as models import tensorrt as trt -from model_test_case import ModelTestCase +class TestPyTorchToTRTEngine(unittest.TestCase): -class TestPyTorchToTRTEngine(ModelTestCase): - - def setUp(self): + def test_pt_to_trt(self): + self.model=models.resnet18(pretrained=True).eval().to("cuda:0") self.input = torch.randn((1, 3, 224, 224)).to("cuda:0") self.ts_model = torch.jit.script(self.model) - - def test_pt_to_trt(self): compile_spec = { "inputs": [torchtrt.Input(self.input.shape)], "truncate_long_and_double": True, @@ -40,17 +37,5 @@ def test_pt_to_trt(self): same = (out - self.ts_model(self.input)).abs().max() self.assertTrue(same < 2e-3) - -def test_suite(): - suite = unittest.TestSuite() - suite.addTest(TestPyTorchToTRTEngine.parametrize(TestPyTorchToTRTEngine, model=models.resnet18(pretrained=True))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/test_ptq_dataloader_calibrator.py b/tests/py/ptq/test_ptq_dataloader_calibrator.py similarity index 58% rename from tests/py/test_ptq_dataloader_calibrator.py rename to tests/py/ptq/test_ptq_dataloader_calibrator.py index 158a5425e8..66c4b7ff15 100644 --- a/tests/py/test_ptq_dataloader_calibrator.py +++ b/tests/py/ptq/test_ptq_dataloader_calibrator.py @@ -6,12 +6,51 @@ from torch.nn import functional as F import torchvision import torchvision.transforms as transforms -from model_test_case import ModelTestCase +import os + +def find_repo_root(max_depth=10): + dir_path = os.path.dirname(os.path.realpath(__file__)) + for i in range(max_depth): + files = os.listdir(dir_path) + if "WORKSPACE" in files: + return dir_path + else: + dir_path = os.path.dirname(dir_path) + + raise RuntimeError("Could not find repo root") + +MODULE_DIR = find_repo_root() + "/tests/modules" + +def compute_accuracy(testing_dataloader, model): + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + device = torch.device('cuda:0') + with torch.no_grad(): + idx = 0 + for data, labels in testing_dataloader: + data, labels = data.to(device), labels.to(device) + out = model(data) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + idx += 1 + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + return correct / total + + +class TestAccuracy(unittest.TestCase): -class TestAccuracy(ModelTestCase): + def test_compile_script(self): - def setUp(self): + self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") self.input = torch.randn((1, 3, 32, 32)).to("cuda") self.testing_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, @@ -33,32 +72,7 @@ def setUp(self): algo_type=torchtrt.ptq.CalibrationAlgo.ENTROPY_CALIBRATION_2, device=torch.device('cuda:0')) - def compute_accuracy(self, testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device('cuda:0') - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - def test_compile_script(self): - - fp32_test_acc = self.compute_accuracy(self.testing_dataloader, self.model) + fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) compile_spec = { @@ -75,24 +89,11 @@ def test_compile_script(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod) + int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc self.assertTrue(abs(acc_diff) < 3) -def test_suite(): - suite = unittest.TestSuite() - # You need a pre-trained VGG cifar10 model to run this test. Please follow instructions at - # https://github.com/NVIDIA/torchtrt/tree/master/cpp/ptq/training/vgg16 to export this model. - suite.addTest(TestAccuracy.parametrize(TestAccuracy, model=torch.jit.load('./trained_vgg16.jit.pt'))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/test_ptq_to_backend.py b/tests/py/ptq/test_ptq_to_backend.py similarity index 62% rename from tests/py/test_ptq_to_backend.py rename to tests/py/ptq/test_ptq_to_backend.py index 297e7d672a..627208960d 100644 --- a/tests/py/test_ptq_to_backend.py +++ b/tests/py/ptq/test_ptq_to_backend.py @@ -6,12 +6,49 @@ from torch.nn import functional as F import torchvision import torchvision.transforms as transforms -from model_test_case import ModelTestCase +import os + +def find_repo_root(max_depth=10): + dir_path = os.path.dirname(os.path.realpath(__file__)) + for i in range(max_depth): + files = os.listdir(dir_path) + if "WORKSPACE" in files: + return dir_path + else: + dir_path = os.path.dirname(dir_path) + + raise RuntimeError("Could not find repo root") + +MODULE_DIR = find_repo_root() + "/tests/modules" + +def compute_accuracy(testing_dataloader, model): + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + device = torch.device('cuda:0') + with torch.no_grad(): + idx = 0 + for data, labels in testing_dataloader: + data, labels = data.to(device), labels.to(device) + out = model(data) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + idx += 1 + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + return correct / total + + +class TestAccuracy(unittest.TestCase): - -class TestAccuracy(ModelTestCase): - - def setUp(self): + def test_compile_script(self): + self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") self.input = torch.randn((1, 3, 32, 32)).to("cuda") self.testing_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, @@ -50,53 +87,16 @@ def setUp(self): }) } - def compute_accuracy(self, testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.cuda(), labels.cuda(non_blocking=True) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - def test_compile_script(self): - fp32_test_acc = self.compute_accuracy(self.testing_dataloader, self.model) + fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) trt_mod = torch._C._jit_to_backend("tensorrt", self.model, self.spec) - int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod) + int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT INT8 Backend] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc self.assertTrue(abs(acc_diff) < 3) -def test_suite(): - suite = unittest.TestSuite() - # You need a pre-trained VGG cifar10 model to run this test. Please follow instructions at - # https://github.com/NVIDIA/torchtrt/tree/master/cpp/ptq/training/vgg16 to export this model. - suite.addTest(TestAccuracy.parametrize(TestAccuracy, model=torch.jit.load('./trained_vgg16.jit.pt'))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() diff --git a/tests/py/test_ptq_trt_calibrator.py b/tests/py/ptq/test_ptq_trt_calibrator.py similarity index 69% rename from tests/py/test_ptq_trt_calibrator.py rename to tests/py/ptq/test_ptq_trt_calibrator.py index 7d9d3fa000..33431e4055 100644 --- a/tests/py/test_ptq_trt_calibrator.py +++ b/tests/py/ptq/test_ptq_trt_calibrator.py @@ -8,7 +8,42 @@ from torch.nn import functional as F import torchvision import torchvision.transforms as transforms -from model_test_case import ModelTestCase + +def find_repo_root(max_depth=10): + dir_path = os.path.dirname(os.path.realpath(__file__)) + for i in range(max_depth): + files = os.listdir(dir_path) + if "WORKSPACE" in files: + return dir_path + else: + dir_path = os.path.dirname(dir_path) + + raise RuntimeError("Could not find repo root") + +MODULE_DIR = find_repo_root() + "/tests/modules" + +def compute_accuracy(testing_dataloader, model): + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + device = torch.device('cuda:0') + with torch.no_grad(): + idx = 0 + for data, labels in testing_dataloader: + data, labels = data.to(device), labels.to(device) + out = model(data) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + idx += 1 + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + return correct / total class TRTEntropyCalibrator(trt.IInt8EntropyCalibrator2): @@ -54,9 +89,10 @@ def write_calibration_cache(self, cache): f.write(cache) -class TestAccuracy(ModelTestCase): +class TestAccuracy(unittest.TestCase): - def setUp(self): + def test_compile_script(self): + self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") self.input = torch.randn((1, 3, 32, 32)).to("cuda") self.testing_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, @@ -74,32 +110,7 @@ def setUp(self): # Test cases can assume using GPU id: 0 self.calibrator = TRTEntropyCalibrator(self.testing_dataloader) - def compute_accuracy(self, testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device('cuda:0') - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - def test_compile_script(self): - - fp32_test_acc = self.compute_accuracy(self.testing_dataloader, self.model) + fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) compile_spec = { @@ -116,24 +127,12 @@ def test_compile_script(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod) + int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc self.assertTrue(abs(acc_diff) < 3) -def test_suite(): - suite = unittest.TestSuite() - # You need a pre-trained VGG cifar10 model to run this test. Please follow instructions at - # https://github.com/NVIDIA/torchtrt/tree/master/cpp/ptq/training/vgg16 to export this model. - suite.addTest(TestAccuracy.parametrize(TestAccuracy, model=torch.jit.load('./trained_vgg16.jit.pt'))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/test_qat_trt_accuracy.py b/tests/py/qat/test_qat_trt_accuracy.py similarity index 52% rename from tests/py/test_qat_trt_accuracy.py rename to tests/py/qat/test_qat_trt_accuracy.py index 74fb70b3df..3086896f8c 100644 --- a/tests/py/test_qat_trt_accuracy.py +++ b/tests/py/qat/test_qat_trt_accuracy.py @@ -6,14 +6,52 @@ from torch.nn import functional as F import torchvision import torchvision.transforms as transforms -from model_test_case import ModelTestCase +import os +import sys + +def find_repo_root(max_depth=10): + dir_path = os.path.dirname(os.path.realpath(__file__)) + for i in range(max_depth): + files = os.listdir(dir_path) + if "WORKSPACE" in files: + return dir_path + else: + dir_path = os.path.dirname(dir_path) + + raise RuntimeError("Could not find repo root") + +MODULE_DIR = find_repo_root() + "/tests/modules" set_reportable_log_level(Level.Graph) -class TestAccuracy(ModelTestCase): +def compute_accuracy(testing_dataloader, model): + total = 0 + correct = 0 + loss = 0.0 + class_probs = [] + class_preds = [] + device = torch.device('cuda:0') + with torch.no_grad(): + idx = 0 + for data, labels in testing_dataloader: + data, labels = data.to(device), labels.to(device) + out = model(data) + preds = torch.max(out, 1)[1] + class_probs.append([F.softmax(i, dim=0) for i in out]) + class_preds.append(preds) + total += labels.size(0) + correct += (preds == labels).sum().item() + idx += 1 + + test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) + test_preds = torch.cat(class_preds) + return correct / total + +class TestAccuracy(unittest.TestCase): - def setUp(self): + def test_compile_script(self): + self.model = torch.jit.load(MODULE_DIR + "/trained_vgg16_qat.jit.pt").eval().to("cuda") self.testing_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, @@ -28,31 +66,7 @@ def setUp(self): shuffle=False, num_workers=1) - def compute_accuracy(self, testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device('cuda:0') - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - def test_compile_script(self): - fp32_test_acc = self.compute_accuracy(self.testing_dataloader, self.model) + fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) compile_spec = { @@ -62,24 +76,10 @@ def test_compile_script(self): } trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - int8_test_acc = self.compute_accuracy(self.testing_dataloader, trt_mod) + int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) log(Level.Info, "[TRT QAT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) acc_diff = fp32_test_acc - int8_test_acc self.assertTrue(abs(acc_diff) < 3) - -def test_suite(): - suite = unittest.TestSuite() - # You need a VGG QAT model trained on CIFAR10 to run this test. Please follow instructions at - # https://github.com/NVIDIA/torchtrt/tree/master/examples/int8/training/vgg16 to export this model. - suite.addTest(TestAccuracy.parametrize(TestAccuracy, model=torch.jit.load('./trained_vgg16_qat.jit.pt'))) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/tests/py/requirements.txt b/tests/py/requirements.txt index 91e97eed3e..784cca7d8d 100644 --- a/tests/py/requirements.txt +++ b/tests/py/requirements.txt @@ -1,2 +1,3 @@ -torchvision==0.12.0+cu113 --f https://download.pytorch.org/whl/torch_stable.html +torchvision==0.12.0 +--extra-index-url https://download.pytorch.org/whl/cu113 +pytest diff --git a/tests/py/test_api.py b/tests/py/test_api.py deleted file mode 100644 index 987e49e1f6..0000000000 --- a/tests/py/test_api.py +++ /dev/null @@ -1,596 +0,0 @@ -import unittest -import torch_tensorrt as torchtrt -import torch -import torchvision.models as models -import copy -from typing import Dict - -from model_test_case import ModelTestCase - - -class TestCompile(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.traced_model = torch.jit.trace(self.model, [self.input]) - self.scripted_model = torch.jit.script(self.model) - - def test_compile_traced(self): - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape, dtype=torch.float, format=torch.contiguous_format)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float} - } - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_script(self): - with torch.no_grad(): - trt_mod = torchtrt.ts.compile(self.scripted_model, - inputs=[self.input], - device=torchtrt.Device(gpu_id=0), - enabled_precisions={torch.float}) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_global(self): - trt_mod = torchtrt.compile(self.scripted_model, - inputs=[self.input], - device=torchtrt.Device(gpu_id=0), - enabled_precisions={torch.float}) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_global_nn_mod(self): - with torch.no_grad(): - trt_mod = torchtrt.compile(self.model, - inputs=[self.input], - device=torchtrt.Device(gpu_id=0), - enabled_precisions={torch.float}) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_from_torch_tensor(self): - compile_spec = { - "inputs": [self.input], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float} - } - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_device(self): - compile_spec = {"inputs": [self.input], "device": torchtrt.Device("gpu:0"), "enabled_precisions": {torch.float}} - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_default_device(self): - compile_spec = {"inputs": [self.input], "enabled_precisions": {torch.float}} - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - def test_compile_script_from_dict(self): - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float} - } - - trt_mod = torchtrt.ts.compile(self.traced_model, **compile_spec) - same = (trt_mod(self.input) - self.traced_model(self.input)).abs().max() - self.assertTrue(same < 2e-2) - - -class TestCompileHalf(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - self.scripted_model.half() - - def test_compile_script_half(self): - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape, dtype=torch.half)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.half} - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() - torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) - self.assertTrue(same < 3e-2) - - -class TestCompileHalfDefault(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - self.scripted_model.half() - - def test_compile_script_half_by_default(self): - compile_spec = { - "inputs": [torchtrt.Input(shape=self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - }, - "enabled_precisions": {torch.float, torch.half} - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input.half()) - self.scripted_model(self.input.half())).abs().max() - torchtrt.logging.log(torchtrt.logging.Level.Debug, "Max diff: " + str(same)) - self.assertTrue(same < 3e-2) - - -class TestFallbackToTorch(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - - def test_compile_script(self): - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False - }, - "require_full_compilation": False, - "torch_executed_ops": ["aten::max_pool2d"], - "min_block_size": 1 - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - - -class TestModuleFallbackToTorch(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.scripted_model = torch.jit.script(self.model) - - def test_compile_script(self): - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False - }, - "require_full_compilation": False, - "torch_executed_modules": ["torchvision.models.resnet.BasicBlock"], - "min_block_size": 1 - } - - trt_mod = torchtrt.ts.compile(self.scripted_model, **compile_spec) - same = (trt_mod(self.input) - self.scripted_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - - -class TestPTtoTRTtoPT(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - self.ts_model = torch.jit.trace(self.model, [self.input]) - - def test_pt_to_trt_to_pt(self): - compile_spec = { - "inputs": [torchtrt.Input(self.input.shape)], - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "allow_gpu_fallback": False, - "disable_tf32": False - } - } - - trt_engine = torchtrt.ts.convert_method_to_trt_engine(self.ts_model, "forward", **compile_spec) - trt_mod = torchtrt.ts.embed_engine_in_new_module(trt_engine, torchtrt.Device("cuda:0")) - same = (trt_mod(self.input) - self.ts_model(self.input)).abs().max() - self.assertTrue(same < 2e-3) - - -class TestInputTypeDefaultsFP32Model(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - - def test_input_use_default_fp32(self): - ts_model = torch.jit.script(self.model) - trt_mod = torchtrt.ts.compile(ts_model, - inputs=[torchtrt.Input(self.input.shape)], - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input) - - def test_input_respect_user_setting_fp32_weights_fp16_in(self): - ts_model = torch.jit.script(self.model) - trt_mod = torchtrt.ts.compile(ts_model, - inputs=[self.input.half()], - require_full_compilation=True, - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input.half()) - - def test_input_respect_user_setting_fp32_weights_fp16_in_non_constructor(self): - ts_model = torch.jit.script(self.model) - input_spec = torchtrt.Input(self.input.shape) - input_spec.dtype = torch.half - - trt_mod = torchtrt.ts.compile(ts_model, - inputs=[input_spec], - require_full_compilation=True, - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input.half()) - - -class TestInputTypeDefaultsFP16Model(ModelTestCase): - - def setUp(self): - self.input = torch.randn((1, 3, 224, 224)).to("cuda") - - def test_input_use_default_fp16(self): - half_mod = torch.jit.script(self.model) - half_mod.half() - - trt_mod = torchtrt.ts.compile(half_mod, - inputs=[torchtrt.Input(self.input.shape)], - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input.half()) - - def test_input_use_default_fp16_without_fp16_enabled(self): - half_mod = torch.jit.script(self.model) - half_mod.half() - - trt_mod = torchtrt.ts.compile(half_mod, inputs=[torchtrt.Input(self.input.shape)]) - trt_mod(self.input.half()) - - def test_input_respect_user_setting_fp16_weights_fp32_in(self): - half_mod = torch.jit.script(self.model) - half_mod.half() - - trt_mod = torchtrt.ts.compile(half_mod, - inputs=[self.input], - require_full_compilation=True, - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input) - - def test_input_respect_user_setting_fp16_weights_fp32_in_non_constuctor(self): - half_mod = torch.jit.script(self.model) - half_mod.half() - - input_spec = torchtrt.Input(self.input.shape) - input_spec.dtype = torch.float - - trt_mod = torchtrt.ts.compile(half_mod, - inputs=[input_spec], - require_full_compilation=True, - enabled_precisions={torch.float, torch.half}) - trt_mod(self.input) - - -class TestCheckMethodOpSupport(unittest.TestCase): - - def setUp(self): - module = models.alexnet(pretrained=True).eval().to("cuda") - self.module = torch.jit.trace(module, torch.ones((1, 3, 224, 224)).to("cuda")) - - def test_check_support(self): - self.assertTrue(torchtrt.ts.check_method_op_support(self.module, "forward")) - - -class TestLoggingAPIs(unittest.TestCase): - - def test_logging_prefix(self): - new_prefix = "Python API Test: " - torchtrt.logging.set_logging_prefix(new_prefix) - logging_prefix = torchtrt.logging.get_logging_prefix() - self.assertEqual(new_prefix, logging_prefix) - - def test_reportable_log_level(self): - new_level = torchtrt.logging.Level.Error - torchtrt.logging.set_reportable_log_level(new_level) - level = torchtrt.logging.get_reportable_log_level() - self.assertEqual(new_level, level) - - def test_is_colored_output_on(self): - torchtrt.logging.set_is_colored_output_on(True) - color = torchtrt.logging.get_is_colored_output_on() - self.assertTrue(color) - - def test_context_managers(self): - base_lvl = torchtrt.logging.get_reportable_log_level() - with torchtrt.logging.internal_errors(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.InternalError, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - with torchtrt.logging.errors(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.Error, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - with torchtrt.logging.warnings(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.Warning, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - with torchtrt.logging.info(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.Info, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - with torchtrt.logging.debug(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.Debug, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - with torchtrt.logging.graphs(): - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(torchtrt.logging.Level.Graph, lvl) - - lvl = torchtrt.logging.get_reportable_log_level() - self.assertEqual(base_lvl, lvl) - - -class TestDevice(unittest.TestCase): - - def test_from_string_constructor(self): - device = torchtrt.Device("cuda:0") - self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) - self.assertEqual(device.gpu_id, 0) - - device = torchtrt.Device("gpu:1") - self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) - self.assertEqual(device.gpu_id, 1) - - def test_from_string_constructor_dla(self): - device = torchtrt.Device("dla:0") - self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) - self.assertEqual(device.gpu_id, 0) - self.assertEqual(device.dla_core, 0) - - device = torchtrt.Device("dla:1", allow_gpu_fallback=True) - self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) - self.assertEqual(device.gpu_id, 0) - self.assertEqual(device.dla_core, 1) - self.assertEqual(device.allow_gpu_fallback, True) - - def test_kwargs_gpu(self): - device = torchtrt.Device(gpu_id=0) - self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) - self.assertEqual(device.gpu_id, 0) - - def test_kwargs_dla_and_settings(self): - device = torchtrt.Device(dla_core=1, allow_gpu_fallback=False) - self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) - self.assertEqual(device.gpu_id, 0) - self.assertEqual(device.dla_core, 1) - self.assertEqual(device.allow_gpu_fallback, False) - - device = torchtrt.Device(gpu_id=1, dla_core=0, allow_gpu_fallback=True) - self.assertEqual(device.device_type, torchtrt.DeviceType.DLA) - self.assertEqual(device.gpu_id, 1) - self.assertEqual(device.dla_core, 0) - self.assertEqual(device.allow_gpu_fallback, True) - - def test_from_torch(self): - device = torchtrt.Device._from_torch_device(torch.device("cuda:0")) - self.assertEqual(device.device_type, torchtrt.DeviceType.GPU) - self.assertEqual(device.gpu_id, 0) - - -class TestInput(unittest.TestCase): - - def _verify_correctness(self, struct: torchtrt.Input, target: Dict) -> bool: - internal = struct._to_internal() - - list_eq = lambda al, bl: all([a == b for (a, b) in zip(al, bl)]) - - eq = lambda a, b: a == b - - def field_is_correct(field, equal_fn, a1, a2): - equal = equal_fn(a1, a2) - if not equal: - print("\nField {} is incorrect: {} != {}".format(field, a1, a2)) - return equal - - min_ = field_is_correct("min", list_eq, internal.min, target["min"]) - opt_ = field_is_correct("opt", list_eq, internal.opt, target["opt"]) - max_ = field_is_correct("max", list_eq, internal.max, target["max"]) - is_dynamic_ = field_is_correct("is_dynamic", eq, internal.input_is_dynamic, target["input_is_dynamic"]) - explicit_set_dtype_ = field_is_correct("explicit_dtype", eq, internal._explicit_set_dtype, - target["explicit_set_dtype"]) - dtype_ = field_is_correct("dtype", eq, int(internal.dtype), int(target["dtype"])) - format_ = field_is_correct("format", eq, int(internal.format), int(target["format"])) - - return all([min_, opt_, max_, is_dynamic_, explicit_set_dtype_, dtype_, format_]) - - def test_infer_from_example_tensor(self): - shape = [1, 3, 255, 255] - target = { - "min": shape, - "opt": shape, - "max": shape, - "input_is_dynamic": False, - "dtype": torchtrt.dtype.half, - "format": torchtrt.TensorFormat.contiguous, - "explicit_set_dtype": True - } - - example_tensor = torch.randn(shape).half() - i = torchtrt.Input._from_tensor(example_tensor) - self.assertTrue(self._verify_correctness(i, target)) - - def test_static_shape(self): - shape = [1, 3, 255, 255] - target = { - "min": shape, - "opt": shape, - "max": shape, - "input_is_dynamic": False, - "dtype": torchtrt.dtype.unknown, - "format": torchtrt.TensorFormat.contiguous, - "explicit_set_dtype": False - } - - i = torchtrt.Input(shape) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(tuple(shape)) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(torch.randn(shape).shape) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(shape=shape) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(shape=tuple(shape)) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(shape=torch.randn(shape).shape) - self.assertTrue(self._verify_correctness(i, target)) - - def test_data_type(self): - shape = [1, 3, 255, 255] - target = { - "min": shape, - "opt": shape, - "max": shape, - "input_is_dynamic": False, - "dtype": torchtrt.dtype.half, - "format": torchtrt.TensorFormat.contiguous, - "explicit_set_dtype": True - } - - i = torchtrt.Input(shape, dtype=torchtrt.dtype.half) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(shape, dtype=torch.half) - self.assertTrue(self._verify_correctness(i, target)) - - def test_tensor_format(self): - shape = [1, 3, 255, 255] - target = { - "min": shape, - "opt": shape, - "max": shape, - "input_is_dynamic": False, - "dtype": torchtrt.dtype.unknown, - "format": torchtrt.TensorFormat.channels_last, - "explicit_set_dtype": False - } - - i = torchtrt.Input(shape, format=torchtrt.TensorFormat.channels_last) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(shape, format=torch.channels_last) - self.assertTrue(self._verify_correctness(i, target)) - - def test_dynamic_shape(self): - min_shape = [1, 3, 128, 128] - opt_shape = [1, 3, 256, 256] - max_shape = [1, 3, 512, 512] - target = { - "min": min_shape, - "opt": opt_shape, - "max": max_shape, - "input_is_dynamic": True, - "dtype": torchtrt.dtype.unknown, - "format": torchtrt.TensorFormat.contiguous, - "explicit_set_dtype": False - } - - i = torchtrt.Input(min_shape=min_shape, opt_shape=opt_shape, max_shape=max_shape) - self.assertTrue(self._verify_correctness(i, target)) - - i = torchtrt.Input(min_shape=tuple(min_shape), opt_shape=tuple(opt_shape), max_shape=tuple(max_shape)) - self.assertTrue(self._verify_correctness(i, target)) - - tensor_shape = lambda shape: torch.randn(shape).shape - i = torchtrt.Input(min_shape=tensor_shape(min_shape), - opt_shape=tensor_shape(opt_shape), - max_shape=tensor_shape(max_shape)) - self.assertTrue(self._verify_correctness(i, target)) - - -class TestModule(unittest.TestCase): - - def test_module_type(self): - nn_module = models.alexnet(pretrained=True).eval().to("cuda") - ts_module = torch.jit.trace(nn_module, torch.ones([1, 3, 224, 224]).to("cuda")) - fx_module = torch.fx.symbolic_trace(nn_module) - - self.assertEqual(torchtrt._compile._parse_module_type(nn_module), torchtrt._compile._ModuleType.nn) - self.assertEqual(torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts) - self.assertEqual(torchtrt._compile._parse_module_type(fx_module), torchtrt._compile._ModuleType.fx) - - -def test_suite(): - suite = unittest.TestSuite() - suite.addTest(unittest.makeSuite(TestLoggingAPIs)) - suite.addTest(TestCompile.parametrize(TestCompile, model=models.resnet18(pretrained=True))) - # Disabling mobilenet_v2 test due to https://nvbugs/3433655 - # suite.addTest(TestCompile.parametrize(TestCompile, model=models.mobilenet_v2(pretrained=True))) - suite.addTest(TestCompileHalf.parametrize(TestCompileHalf, model=models.resnet18(pretrained=True))) - suite.addTest(TestCompileHalfDefault.parametrize(TestCompileHalfDefault, model=models.resnet18(pretrained=True))) - suite.addTest(TestPTtoTRTtoPT.parametrize(TestPTtoTRTtoPT, model=models.resnet18(pretrained=True))) - suite.addTest( - TestInputTypeDefaultsFP32Model.parametrize(TestInputTypeDefaultsFP32Model, - model=models.resnet18(pretrained=True))) - suite.addTest( - TestInputTypeDefaultsFP16Model.parametrize(TestInputTypeDefaultsFP16Model, - model=models.resnet18(pretrained=True))) - suite.addTest(TestFallbackToTorch.parametrize(TestFallbackToTorch, model=models.resnet18(pretrained=True))) - suite.addTest( - TestModuleFallbackToTorch.parametrize(TestModuleFallbackToTorch, model=models.resnet18(pretrained=True))) - suite.addTest(unittest.makeSuite(TestCheckMethodOpSupport)) - suite.addTest(unittest.makeSuite(TestDevice)) - suite.addTest(unittest.makeSuite(TestInput)) - suite.addTest(unittest.makeSuite(TestModule)) - - return suite - - -suite = test_suite() - -runner = unittest.TextTestRunner() -result = runner.run(suite) - -exit(int(not result.wasSuccessful())) diff --git a/toolchains/ci_workspaces/WORKSPACE.sbsa b/toolchains/ci_workspaces/WORKSPACE.sbsa new file mode 100644 index 0000000000..93cf5876a7 --- /dev/null +++ b/toolchains/ci_workspaces/WORKSPACE.sbsa @@ -0,0 +1,147 @@ +workspace(name = "Torch-TensorRT") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository") + +http_archive( + name = "rules_python", + sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f", + url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz", +) + +load("@rules_python//python:pip.bzl", "pip_install") + +http_archive( + name = "rules_pkg", + sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", + "https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", + ], +) + +load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") + +rules_pkg_dependencies() + +git_repository( + name = "googletest", + commit = "703bd9caab50b139428cea1aaff9974ebee5742e", + remote = "https://github.com/google/googletest", + shallow_since = "1570114335 -0400", +) + +# External dependency for torch_tensorrt if you already have precompiled binaries. +local_repository( + name = "torch_tensorrt", + path = "/opt/circleci/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch_tensorrt" +) + +# CUDA should be installed on the system locally +new_local_repository( + name = "cuda", + build_file = "@//third_party/cuda:BUILD", + path = "/usr/local/cuda/", +) + +new_local_repository( + name = "cublas", + build_file = "@//third_party/cublas:BUILD", + path = "/usr", +) +############################################################################################################# +# Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs) +############################################################################################################# + +#http_archive( +# name = "libtorch", +# build_file = "@//third_party/libtorch:BUILD", +# sha256 = "8d9e829ce9478db4f35bdb7943308cf02e8a2f58cf9bb10f742462c1d57bf287", +# strip_prefix = "libtorch", +# urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.11.0%2Bcu113.zip"], +#) +# +#http_archive( +# name = "libtorch_pre_cxx11_abi", +# build_file = "@//third_party/libtorch:BUILD", +# sha256 = "90159ecce3ff451f3ef3f657493b6c7c96759c3b74bbd70c1695f2ea2f81e1ad", +# strip_prefix = "libtorch", +# urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.11.0%2Bcu113.zip"], +#) + +# Download these tarballs manually from the NVIDIA website +# Either place them in the distdir directory in third_party and use the --distdir flag +# or modify the urls to "file:////.tar.gz + +#http_archive( +# name = "cudnn", +# build_file = "@//third_party/cudnn/archive:BUILD", +# sha256 = "0e5d2df890b9967efa6619da421310d97323565a79f05a1a8cb9b7165baad0d7", +# strip_prefix = "cuda", +# urls = [ +# "https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.4/11.4_20210831/cudnn-11.4-linux-x64-v8.2.4.15.tgz", +# ], +#) +# +#http_archive( +# name = "tensorrt", +# build_file = "@//third_party/tensorrt/archive:BUILD", +# sha256 = "826180eaaecdf9a7e76116855b9f1f3400ea9b06e66b06a3f6a0747ba6f863ad", +# strip_prefix = "TensorRT-8.2.4.2", +# urls = [ +# "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.4/tars/tensorrt-8.2.4.2.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz", +# ], +#) + +#################################################################################### +# Locally installed dependencies (use in cases of custom dependencies or aarch64) +#################################################################################### + +# NOTE: In the case you are using just the pre-cxx11-abi path or just the cxx11 abi path +# with your local libtorch, just point deps at the same path to satisfy bazel. + +# NOTE: NVIDIA's aarch64 PyTorch (python) wheel file uses the CXX11 ABI unlike PyTorch's standard +# x86_64 python distribution. If using NVIDIA's version just point to the root of the package +# for both versions here and do not use --config=pre-cxx11-abi + +new_local_repository( + name = "libtorch", + path = "/opt/circleci/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch", + build_file = "third_party/libtorch/BUILD" +) + +new_local_repository( + name = "libtorch_pre_cxx11_abi", + path = "/opt/circleci/.pyenv/versions/3.8.10/lib/python3.8/site-packages/torch", + build_file = "third_party/libtorch/BUILD" +) + +new_local_repository( + name = "cudnn", + path = "/usr/", + build_file = "@//third_party/cudnn/local:BUILD" +) + +new_local_repository( + name = "tensorrt", + path = "/usr/", + build_file = "@//third_party/tensorrt/local:BUILD" +) + +# ######################################################################### +# # Testing Dependencies (optional - comment out on aarch64) +# ######################################################################### +# pip_install( +# name = "torch_tensorrt_py_deps", +# requirements = "//py:requirements.txt", +# ) + +# pip_install( +# name = "py_test_deps", +# requirements = "//tests/py:requirements.txt", +# ) + +pip_install( + name = "pylinter_deps", + requirements = "//tools/linter:requirements.txt", +) diff --git a/WORKSPACE.ci b/toolchains/ci_workspaces/WORKSPACE.x86_64 similarity index 98% rename from WORKSPACE.ci rename to toolchains/ci_workspaces/WORKSPACE.x86_64 index 1eeb75ea7d..00f8efc5df 100644 --- a/WORKSPACE.ci +++ b/toolchains/ci_workspaces/WORKSPACE.x86_64 @@ -34,7 +34,7 @@ git_repository( # External dependency for torch_tensorrt if you already have precompiled binaries. local_repository( name = "torch_tensorrt", - path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt" + path = "/opt/circleci/.pyenv/versions/3.9.4/lib/python3.9/site-packages/torch_tensorrt" ) # CUDA should be installed on the system locally