diff --git a/.github/workflows/build-conda-images.yml b/.github/workflows/build-conda-images.yml
index 8104f8866..e92407e8f 100644
--- a/.github/workflows/build-conda-images.yml
+++ b/.github/workflows/build-conda-images.yml
@@ -26,7 +26,7 @@ jobs:
     runs-on: linux.2xlarge
     strategy:
       matrix:
-        cuda_version: ["11.6", "11.7", "11.8", "cpu"]
+        cuda_version: ["11.6", "11.7", "11.8","12.0", "cpu"]
     env:
       CUDA_VERSION: ${{ matrix.cuda_version }}
     steps:
diff --git a/.github/workflows/build-libtorch-images.yml b/.github/workflows/build-libtorch-images.yml
index d32b6e4b0..e1446f8e2 100644
--- a/.github/workflows/build-libtorch-images.yml
+++ b/.github/workflows/build-libtorch-images.yml
@@ -28,7 +28,7 @@ jobs:
     runs-on: ubuntu-18.04
     strategy:
       matrix:
-        cuda_version: ["11.8", "11.7", "11.6"]
+        cuda_version: ["12.1", "11.8", "11.7", "11.6"]
     env:
       GPU_ARCH_TYPE: cuda
       GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
diff --git a/.github/workflows/build-manywheel-images.yml b/.github/workflows/build-manywheel-images.yml
index bbd221989..697b3642e 100644
--- a/.github/workflows/build-manywheel-images.yml
+++ b/.github/workflows/build-manywheel-images.yml
@@ -30,7 +30,7 @@ jobs:
     runs-on: ubuntu-18.04
     strategy:
       matrix:
-        cuda_version: ["11.8", "11.7", "11.6"]
+        cuda_version: ["12.1", "11.8", "11.7", "11.6"]
     env:
       GPU_ARCH_TYPE: cuda
       GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
diff --git a/common/install_cuda.sh b/common/install_cuda.sh
index 359df5b3b..87f4ab620 100644
--- a/common/install_cuda.sh
+++ b/common/install_cuda.sh
@@ -85,6 +85,37 @@ function install_118 {
     ldconfig
 }
 
+function install_121 {
+    echo "Installing CUDA 12.1 and cuDNN 8.8 and NCCL 2.16"
+    rm -rf /usr/local/cuda-12.1 /usr/local/cuda
+    # install CUDA 12.1.0 in the same container
+    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run
+    chmod +x cuda_12.1.0_530.30.02_linux.run
+    ./cuda_12.1.0_530.30.02_linux.run --toolkit --silent
+    rm -f cuda_12.1.0_530.30.02_linux.run
+    rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
+
+    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
+    mkdir tmp_cudnn && cd tmp_cudnn
+    wget -q https://developer.download.nvidia.com/compute/redist/cudnn/v8.8.0/local_installers/12.0/cudnn-linux-x86_64-8.8.0.121_cuda12-archive.tar.xz -O cudnn-linux-x86_64-8.8.0.121_cuda12-archive.tar.xz
+    tar xf cudnn-linux-x86_64-8.8.0.121_cuda12-archive.tar.xz
+    cp -a cudnn-linux-x86_64-8.8.0.121_cuda12-archive/include/* /usr/local/cuda/include/
+    cp -a cudnn-linux-x86_64-8.8.0.121_cuda12-archive/lib/* /usr/local/cuda/lib64/
+    cd ..
+    rm -rf tmp_cudnn
+    ldconfig
+
+    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
+    mkdir tmp_nccl && cd tmp_nccl
+    wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.17.1/nccl_2.17.1-1+cuda12.1_x86_64.txz
+    tar xf nccl_2.17.1-1+cuda12.1_x86_64.txz
+    cp -a nccl_2.17.1-1+cuda12.1_x86_64/include/* /usr/local/cuda/include/
+    cp -a nccl_2.17.1-1+cuda12.1_x86_64/lib/* /usr/local/cuda/lib64/
+    cd ..
+    rm -rf tmp_nccl
+    ldconfig
+}
+
 function prune_116 {
     echo "Pruning CUDA 11.6 and CuDNN"
     #####################################################################################
@@ -178,6 +209,38 @@ function prune_118 {
     rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
 }
 
+
+function prune_121 {
+    echo "Pruning CUDA 12.1 and cuDNN"
+    #####################################################################################
+    # CUDA 12.1 prune static libs
+    #####################################################################################
+    export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
+    export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
+
+    export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+    export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
+
+    if [[ -n "$OVERRIDE_GENCODE" ]]; then
+        export GENCODE=$OVERRIDE_GENCODE
+    fi
+
+    # all CUDA libs except CuDNN and CuBLAS (cudnn and cublas need arch 3.7 included)
+    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
+      | xargs -I {} bash -c \
+                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
+
+    # prune CuDNN and CuBLAS
+    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
+    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
+
+    #####################################################################################
+    # CUDA 12.1 prune visual tools
+    #####################################################################################
+    export CUDA_BASE="/usr/local/cuda-12.1/"
+    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.4.1.6 $CUDA_BASE/nsight-systems-2022.4.2.50/
+}
+
 # idiomatic parameter and option handling in sh
 while test $# -gt 0
 do
@@ -188,6 +251,8 @@ do
 	        ;;
     11.8) install_118; prune_118
 	        ;;
+	  12.1) install_121; prune_121
+	        ;;
 	*) echo "bad argument $1"; exit 1
 	   ;;
     esac
diff --git a/conda/Dockerfile b/conda/Dockerfile
index c65e1ad99..07af9d055 100644
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
@@ -60,6 +60,10 @@ FROM cuda as cuda11.8
 RUN bash ./install_cuda.sh 11.8
 ENV DESIRED_CUDA=11.8
 
+FROM cuda as cuda12.1
+RUN bash ./install_cuda.sh 12.1
+ENV DESIRED_CUDA=12.1
+
 # Install MNIST test data
 FROM base as mnist
 ADD ./common/install_mnist.sh install_mnist.sh
@@ -69,6 +73,7 @@ FROM base as all_cuda
 COPY --from=cuda11.6  /usr/local/cuda-11.6 /usr/local/cuda-11.6
 COPY --from=cuda11.7  /usr/local/cuda-11.7 /usr/local/cuda-11.7
 COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8
+COPY --from=cuda12.1  /usr/local/cuda-12.1 /usr/local/cuda-12.1
 
 FROM ${BASE_TARGET} as final
 # Install LLVM
diff --git a/conda/build_all_docker.sh b/conda/build_all_docker.sh
index 1dc5ffe4f..dcabde764 100755
--- a/conda/build_all_docker.sh
+++ b/conda/build_all_docker.sh
@@ -4,6 +4,6 @@ set -eou pipefail
 
 TOPDIR=$(git rev-parse --show-toplevel)
 
-for CUDA_VERSION in 11.8 11.7 11.6 cpu; do
+for CUDA_VERSION in 12.1 11.8 11.7 11.6 cpu; do
   CUDA_VERSION="${CUDA_VERSION}" conda/build_docker.sh
 done
diff --git a/conda/build_pytorch.sh b/conda/build_pytorch.sh
index 998e72807..bfa92d5e8 100755
--- a/conda/build_pytorch.sh
+++ b/conda/build_pytorch.sh
@@ -265,7 +265,10 @@ else
     . ./switch_cuda_version.sh "$desired_cuda"
     # TODO, simplify after anaconda fixes their cudatoolkit versioning inconsistency.
     # see: https://github.com/conda-forge/conda-forge.github.io/issues/687#issuecomment-460086164
-    if [[ "$desired_cuda" == "11.8" ]]; then
+    if [[ "$desired_cuda" == "12.1" ]]; then
+        export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=12.1,<11.9 # [not osx]"
+        export MAGMA_PACKAGE="    - magma-cuda121 # [not osx and not win]"
+    elif [[ "$desired_cuda" == "11.8" ]]; then
         export CONDA_CUDATOOLKIT_CONSTRAINT="    - pytorch-cuda >=11.8,<11.9 # [not osx]"
         export MAGMA_PACKAGE="    - magma-cuda118 # [not osx and not win]"
     elif [[ "$desired_cuda" == "11.7" ]]; then
diff --git a/conda/pytorch-cuda/conda_build_config.yaml b/conda/pytorch-cuda/conda_build_config.yaml
index 67d14f2b1..93a2bb22d 100644
--- a/conda/pytorch-cuda/conda_build_config.yaml
+++ b/conda/pytorch-cuda/conda_build_config.yaml
@@ -2,6 +2,7 @@ version:
     - 11.6
     - 11.7
     - 11.8
+    - 12.1
 target_platform:
     - win-64
     - linux-64
diff --git a/conda/pytorch-cuda/meta.yaml b/conda/pytorch-cuda/meta.yaml
index ecb438ca8..8744739cb 100644
--- a/conda/pytorch-cuda/meta.yaml
+++ b/conda/pytorch-cuda/meta.yaml
@@ -35,6 +35,14 @@
 {% set libcusparse_constraints=">=11.7.5.86,<12.0.0.76" %}
 {% set libnpp_constraints=">=11.8.0.86,<12.0.0.30" %}
 {% set libnvjpeg_constraints=">=11.9.0.86,<12.0.0.28" %}
+{% elif version == '12.1' %}
+{% set cuda_constraints=">=12.1,<12.2" %}
+{% set libcufft_constraints=">=11.0.1.95,<=11.0.2.4" %}
+{% set libcublas_constraints=">=12.0.2.224,<=12.1.0.26" %}
+{% set libcusolver_constraints=">=11.4.3.1,<=11.4.4.55" %}
+{% set libcusparse_constraints=">=12.0.1.140,<=12.0.2.55" %}
+{% set libnpp_constraints=">=12.0.1.104,<=12.0.2.50" %}
+{% set libnvjpeg_constraints=">=12.0.1.102,<=12.1.0.39" %}
 {% endif %}
 
 package:
diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat
index 18850f758..1bd95de11 100644
--- a/conda/pytorch-nightly/bld.bat
+++ b/conda/pytorch-nightly/bld.bat
@@ -35,6 +35,11 @@ if "%desired_cuda%" == "11.8" (
     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
 )
 
+if "%desired_cuda%" == "12.1" (
+    set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;9.0
+    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2
+)
+
 :cuda_flags_end
 
 set DISTUTILS_USE_SDK=1
diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh
index ad1871ac4..c431db96e 100755
--- a/conda/pytorch-nightly/build.sh
+++ b/conda/pytorch-nightly/build.sh
@@ -70,12 +70,18 @@ if [[ -n "$build_with_cuda" ]]; then
         #for cuda 11.7 include all dynamic loading libraries
         DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.7/extras/CUPTI/lib64/libcupti.so.11.7)
     elif [[ $CUDA_VERSION == 11.8* ]]; then
+        TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0"
+        #for cuda 11.8 we use cudnn 8.7
+        #which does not have single static libcudnn_static.a deliverable to link with
+        export USE_STATIC_CUDNN=0
+        #for cuda 11.8 include all dynamic loading libraries
+        DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
 	TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0"
-	#for cuda 11.8 we use cudnn 8.7
+	#for cuda 12.1 we use cudnn 8.8
 	#which does not have single static libcudnn_static.a deliverable to link with
 	export USE_STATIC_CUDNN=0
-	#for cuda 11.8 include all dynamic loading libraries
-	DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8)
+	#for cuda 12.1 include all dynamic loading libraries
+	DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-12.1/extras/CUPTI/lib64/libcupti.so.12.1)
     fi
     if [[ -n "$OVERRIDE_TORCH_CUDA_ARCH_LIST" ]]; then
         TORCH_CUDA_ARCH_LIST="$OVERRIDE_TORCH_CUDA_ARCH_LIST"
diff --git a/libtorch/Dockerfile b/libtorch/Dockerfile
index 3a116f8b5..18d821917 100644
--- a/libtorch/Dockerfile
+++ b/libtorch/Dockerfile
@@ -56,6 +56,10 @@ FROM cuda as cuda11.8
 RUN bash ./install_cuda.sh 11.8
 RUN bash ./install_magma.sh 11.8
 
+FROM cuda as cuda12.1
+RUN bash ./install_cuda.sh 12.1
+RUN bash ./install_magma.sh 12.1
+
 FROM cpu as rocm
 ARG PYTORCH_ROCM_ARCH
 ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
diff --git a/libtorch/build_all_docker.sh b/libtorch/build_all_docker.sh
index 8d25da9bc..45a770e24 100755
--- a/libtorch/build_all_docker.sh
+++ b/libtorch/build_all_docker.sh
@@ -4,7 +4,7 @@ set -eou pipefail
 
 TOPDIR=$(git rev-parse --show-toplevel)
 
-for cuda_version in 11.8 11.7 11.6; do
+for cuda_version in 12.1 11.8 11.7 11.6; do
     GPU_ARCH_TYPE=cuda GPU_ARCH_VERSION="${cuda_version}" "${TOPDIR}/libtorch/build_docker.sh"
 done
 
diff --git a/magma/Makefile b/magma/Makefile
index 4a90a43e2..5bcd56eb4 100644
--- a/magma/Makefile
+++ b/magma/Makefile
@@ -23,6 +23,13 @@ clean:
 	$(RM) -r magma-*
 	$(RM) -r output
 
+.PHONY: magma-cuda121
+magma-cuda121: DESIRED_CUDA := 12.1
+magma-cuda121: PACKAGE_NAME := magma-cuda121
+magma-cuda121: CUDA_ARCH_LIST += -gencode arch=compute_90,code=sm_90
+magma-cuda121:
+	$(DOCKER_RUN)
+
 .PHONY: magma-cuda118
 magma-cuda118: DESIRED_CUDA := 11.8
 magma-cuda118: PACKAGE_NAME := magma-cuda118
diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh
index bd04cdd60..908b55691 100644
--- a/manywheel/build_cuda.sh
+++ b/manywheel/build_cuda.sh
@@ -58,6 +58,10 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
 
 TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0"
 case ${CUDA_VERSION} in
+    12.1)
+        TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0"
+        EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
+        ;;
     11.8)
         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0"
         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
@@ -142,7 +146,7 @@ DEPS_SONAME=(
     "libcublasLt.so.11"
     "libgomp.so.1"
 )
-elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
+elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" $CUDA_VERSION == "12.1"]]; then
     export USE_STATIC_CUDNN=0
     # Try parallelizing nvcc as well
     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
@@ -199,6 +203,14 @@ elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
                 "libnvrtc-builtins.so.11.8"
             )
         fi
+        if [[ $CUDA_VERSION == "12.1" ]]; then
+            DEPS_LIST+=(
+                "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.1"
+            )
+            DEPS_SONAME+=(
+                "libnvrtc-builtins.so.12.1"
+            )
+        fi
     else
         echo "Using nvidia libs from pypi."
         CUDA_RPATHS=(
diff --git a/windows/cuda120.bat b/windows/cuda120.bat
new file mode 100644
index 000000000..5654ab551
--- /dev/null
+++ b/windows/cuda120.bat
@@ -0,0 +1,58 @@
+@echo off
+
+set MODULE_NAME=pytorch
+
+IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
+    call internal\clone.bat
+    cd ..
+) ELSE (
+    call internal\clean.bat
+)
+IF ERRORLEVEL 1 goto :eof
+
+call internal\check_deps.bat
+IF ERRORLEVEL 1 goto :eof
+
+REM Check for optional components
+
+set USE_CUDA=
+set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
+
+IF "%NVTOOLSEXT_PATH%"=="" (
+    IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib"  (
+        set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
+    ) ELSE (
+        echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
+        exit /b 1
+    )
+)
+
+IF "%CUDA_PATH_V121%"=="" (
+    IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin\nvcc.exe" (
+        set "CUDA_PATH_V121=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1"
+    ) ELSE (
+        echo CUDA 12.1 not found, failing
+        exit /b 1
+    )
+)
+
+IF "%BUILD_VISION%" == "" (
+    set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
+    set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
+) ELSE (
+    set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
+)
+
+set "CUDA_PATH=%CUDA_PATH_V121%"
+set "PATH=%CUDA_PATH_V121%\bin;%PATH%"
+
+:optcheck
+
+call internal\check_opts.bat
+IF ERRORLEVEL 1 goto :eof
+
+call internal\copy.bat
+IF ERRORLEVEL 1 goto :eof
+
+call internal\setup.bat
+IF ERRORLEVEL 1 goto :eof
diff --git a/windows/internal/cuda_install.bat b/windows/internal/cuda_install.bat
index b4f11a58a..ebde9ff34 100644
--- a/windows/internal/cuda_install.bat
+++ b/windows/internal/cuda_install.bat
@@ -22,6 +22,7 @@ if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%
 if %CUDA_VER% EQU 116 goto cuda116
 if %CUDA_VER% EQU 117 goto cuda117
 if %CUDA_VER% EQU 118 goto cuda118
+if %CUDA_VER% EQU 121 goto cuda121
 
 echo CUDA %CUDA_VERSION_STR% is not supported
 exit /b 1
@@ -105,6 +106,31 @@ curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%
 7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib"
 xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
 
+:cuda121
+
+set CUDA_INSTALL_EXE=cuda_12.1.0_531.14_windows.exe
+if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
+    curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
+    if errorlevel 1 exit /b 1
+    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
+    set "ARGS=cuda_profiler_api_12.1 thrust_12.1 nvcc_12.1 cuobjdump_12.1 nvprune_12.1 nvprof_12.1 cupti_12.1 cublas_12.1 cublas_dev_12.1 cudart_12.1 cufft_12.1 cufft_dev_12.1 curand_12.1 curand_dev_12.1 cusolver_12.1 cusolver_dev_12.1 cusparse_12.1 cusparse_dev_12.1 npp_12.1 npp_dev_12.1 nvrtc_12.1 nvrtc_dev_12.1 nvml_dev_12.1"
+)
+
+set CUDNN_FOLDER=cudnn-windows-x86_64-8.8.0.121_cuda12-archive
+set CUDNN_LIB_FOLDER="lib"
+set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip"
+if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
+    curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
+    if errorlevel 1 exit /b 1
+    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
+)
+
+@REM Cuda 8.3+ required zlib to be installed on the path
+echo Installing ZLIB dlls
+curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip"
+7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib"
+xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
+
 goto cuda_common
 
 :cuda_common
diff --git a/windows/internal/smoke_test.bat b/windows/internal/smoke_test.bat
index 49ff5e6ca..bb4974197 100644
--- a/windows/internal/smoke_test.bat
+++ b/windows/internal/smoke_test.bat
@@ -74,6 +74,9 @@ if "%CUDA_VERSION%" == "117" (
 if "%CUDA_VERSION%" == "118" (
     set "CONDA_EXTRA_ARGS=pytorch-cuda=11.8 -c nvidia -c pytorch-nightly"
 )
+if "%CUDA_VERSION%" == "121" (
+    set "CONDA_EXTRA_ARGS=pytorch-cuda=12.1 -c nvidia -c pytorch-nightly"
+)
 
 rmdir /s /q conda
 del miniconda.exe