Add smoke tests conv,linalg,compile. And better version check. (#1333)

atalman · web-flow · commit 28508a377d49 · 2023-03-07T18:07:14.000-05:00
* Add smoke tests conv,linalg,compile

* Add version check

* Fix typo

Fix version check

Add not

* Add exception for python 3.11

* fix typo

* Try to exit after CUDA Runtime exception

* Restrict carsh test only to conda

* Restrict carsh test only to conda

* Fix tests

* Turn off cuda runtime issue

* tests

* more tests

* test

* remove compile step

* test

* disable some of the tests

* testing

* Remove extra index url

* test

* Fix tests

* Additional smoke tests

Remove release blocking changes
diff --git a/.github/scripts/validate_binaries.sh b/.github/scripts/validate_binaries.sh
@@ -17,9 +17,11 @@ else
         conda env remove -n ${ENV_NAME}
     else
 
+
+
         # Special case Pypi installation package, only applicable to linux nightly CUDA 11.7 builds, wheel package
-        if [[ ${TARGET_OS} == 'linux' && ${MATRIX_CHANNEL} == 'nightly' && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
-            conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy
+        if [[ ${TARGET_OS} == 'linux'  && ${MATRIX_GPU_ARCH_VERSION} == '11.7' && ${MATRIX_PACKAGE_TYPE} == 'manywheel' ]]; then
+            conda create -yp ${ENV_NAME}_pypi python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
             INSTALLATION_PYPI=${MATRIX_INSTALLATION/"cu117"/"cu117_pypi_cudnn"}
             INSTALLATION_PYPI=${INSTALLATION_PYPI/"torchvision torchaudio"/""}
             INSTALLATION_PYPI=${INSTALLATION_PYPI/"index-url"/"extra-index-url"}
@@ -29,9 +31,11 @@ else
             conda env remove -p ${ENV_NAME}_pypi
         fi
 
-        conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy pillow
+        # Please note ffmpeg is required for torchaudio, see https://github.com/pytorch/pytorch/issues/96159
+        conda create -y -n ${ENV_NAME} python=${MATRIX_PYTHON_VERSION} numpy ffmpeg
         conda activate ${ENV_NAME}
         INSTALLATION=${MATRIX_INSTALLATION/"conda install"/"conda install -y"}
+        INSTALLATION=${INSTALLATION/"extra-index-url"/"index-url"}
         eval $INSTALLATION
 
         if [[ ${TARGET_OS} == 'linux' ]]; then
diff --git a/test/smoke_test/smoke_test.py b/test/smoke_test/smoke_test.py
@@ -10,8 +10,10 @@
 
 gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
 gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
-# use installation env variable to tell if it is nightly channel
-installation_str = os.getenv("MATRIX_INSTALLATION")
+channel = os.getenv("MATRIX_CHANNEL")
+stable_version = os.getenv("MATRIX_STABLE_VERSION")
+package_type = os.getenv("MATRIX_PACKAGE_TYPE")
+
 is_cuda_system = gpu_arch_type == "cuda"
 SCRIPT_DIR = Path(__file__).parent
 NIGHTLY_ALLOWED_DELTA = 3
@@ -31,6 +33,16 @@
     },
 ]
 
+def check_version(package: str) -> None:
+    # only makes sense to check nightly package where dates are known
+    if channel == "nightly":
+        check_nightly_binaries_date(options.package)
+    else:
+        if not torch.__version__.startswith(stable_version):
+            raise RuntimeError(
+                f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
+            )
+
 def check_nightly_binaries_date(package: str) -> None:
     from datetime import datetime, timedelta
     format_dt = '%Y%m%d'
@@ -58,6 +70,7 @@ def check_nightly_binaries_date(package: str) -> None:
 def test_cuda_runtime_errors_captured() -> None:
     cuda_exception_missed=True
     try:
+        print("Testing test_cuda_runtime_errors_captured")
         torch._assert_async(torch.tensor(0, device="cuda"))
         torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
     except RuntimeError as e:
@@ -95,29 +108,73 @@ def smoke_test_cuda(package: str) -> None:
         print(f"torch cudnn: {torch.backends.cudnn.version()}")
         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
 
-        # This check has to be run last, since its messing up CUDA runtime
+        # torch.compile is available only on Linux and python 3.8-3.10
+        if (sys.platform == "linux" or sys.platform == "linux2") and sys.version_info < (3, 11, 0):
+            smoke_test_compile()
+
         test_cuda_runtime_errors_captured()
 
 
 def smoke_test_conv2d() -> None:
     import torch.nn as nn
 
-    print("Calling smoke_test_conv2d")
+    print("Testing smoke_test_conv2d")
     # With square kernels and equal stride
     m = nn.Conv2d(16, 33, 3, stride=2)
     # non-square kernels and unequal stride and with padding
     m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
     # non-square kernels and unequal stride and with padding and dilation
-    m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
+    basic_conv = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
     input = torch.randn(20, 16, 50, 100)
-    output = m(input)
+    output = basic_conv(input)
+
     if is_cuda_system:
         print("Testing smoke_test_conv2d with cuda")
         conv = nn.Conv2d(3, 3, 3).cuda()
         x = torch.randn(1, 3, 24, 24).cuda()
         with torch.cuda.amp.autocast():
             out = conv(x)
 
+        supported_dtypes = [torch.float16, torch.float32, torch.float64]
+        for dtype in supported_dtypes:
+            print(f"Testing smoke_test_conv2d with cuda for {dtype}")
+            conv = basic_conv.to(dtype).cuda()
+            input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
+            output = conv(input)
+
+def smoke_test_linalg() -> None:
+    print("Testing smoke_test_linalg")
+    A = torch.randn(5, 3)
+    U, S, Vh = torch.linalg.svd(A, full_matrices=False)
+    U.shape, S.shape, Vh.shape
+    torch.dist(A, U @ torch.diag(S) @ Vh)
+
+    U, S, Vh = torch.linalg.svd(A)
+    U.shape, S.shape, Vh.shape
+    torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
+
+    A = torch.randn(7, 5, 3)
+    U, S, Vh = torch.linalg.svd(A, full_matrices=False)
+    torch.dist(A, U @ torch.diag_embed(S) @ Vh)
+
+    if is_cuda_system:
+        supported_dtypes = [torch.float32, torch.float64]
+        for dtype in supported_dtypes:
+            print(f"Testing smoke_test_linalg with cuda for {dtype}")
+            A = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
+            torch.linalg.svd(A)
+
+def smoke_test_compile() -> None:
+    supported_dtypes = [torch.float16, torch.float32, torch.float64]
+    def foo(x: torch.Tensor) -> torch.Tensor:
+        return torch.sin(x) + torch.cos(x)
+    for dtype in supported_dtypes:
+        print(f"Testing smoke_test_compile for {dtype}")
+        x = torch.rand(3, 3, device="cuda").type(dtype)
+        x_eager = foo(x)
+        x_pt2 = torch.compile(foo)(x)
+        print(torch.allclose(x_eager, x_pt2))
+
 
 def smoke_test_modules():
     for module in MODULES:
@@ -146,15 +203,13 @@ def main() -> None:
     )
     options = parser.parse_args()
     print(f"torch: {torch.__version__}")
+    check_version(options.package)
     smoke_test_conv2d()
+    smoke_test_linalg()
 
     if options.package == "all":
         smoke_test_modules()
 
-    # only makes sense to check nightly package where dates are known
-    if installation_str.find("nightly") != -1:
-        check_nightly_binaries_date(options.package)
-
     smoke_test_cuda(options.package)