From 346fafbe6a31befa35701fcbfa14eeda48d79381 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 14:08:08 -0400 Subject: [PATCH 1/8] Build dind --- docker/gcp-a100-runner-dind.dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 98f012def7..c4cf0ca4e5 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -1,6 +1,10 @@ # default base image: ghcr.io/actions/actions-runner:latest # base image: Ubuntu 22.04 jammy ARG BASE_IMAGE=ghcr.io/actions/actions-runner:latest +# Prune CUDA to only keep gencode >= A100 +ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" +ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" + FROM ${BASE_IMAGE} ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 From a0bba7a4d5ba8b23bd27a322999978715f7ed626 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 14:48:07 -0400 Subject: [PATCH 2/8] Another try --- docker/gcp-a100-runner-dind.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index c4cf0ca4e5..af159ccef4 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -34,7 +34,7 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # Use the CUDA installation scripts from pytorch/builder # Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c 'source /workspace/builder/common/install_cuda.sh; install_124; prune_124' +RUN sudo bash -c "source /workspace/builder/common/install_cuda.sh; install_124; OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\" prune_124" # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh From 3e013c1ed4ff8c8075b1661ae505718e843ebab1 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 16:08:44 -0400 Subject: [PATCH 3/8] Add debugging commands --- docker/gcp-a100-runner-dind.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index af159ccef4..6a881f25a7 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -34,7 +34,7 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # Use the CUDA installation scripts from pytorch/builder # Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c "source /workspace/builder/common/install_cuda.sh; install_124; OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\" prune_124" +RUN sudo bash -c "set -x; source /workspace/builder/common/install_cuda.sh; install_124; export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; prune_124" # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh From 0e2caeb4fe98c926ebb5897bc9f157a6447c9aac Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 16:36:16 -0400 Subject: [PATCH 4/8] Another try --- docker/gcp-a100-runner-dind.dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 6a881f25a7..72aff014f7 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -2,8 +2,8 @@ # base image: Ubuntu 22.04 jammy ARG BASE_IMAGE=ghcr.io/actions/actions-runner:latest # Prune CUDA to only keep gencode >= A100 -ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" -ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" +ENV OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" +ENV OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" FROM ${BASE_IMAGE} From 29953be4ca3009f893baa116292b9f37d16846cc Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 16:59:55 -0400 Subject: [PATCH 5/8] Add args --- docker/gcp-a100-runner-dind.dockerfile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 72aff014f7..59c3cea997 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -1,13 +1,12 @@ # default base image: ghcr.io/actions/actions-runner:latest # base image: Ubuntu 22.04 jammy -ARG BASE_IMAGE=ghcr.io/actions/actions-runner:latest # Prune CUDA to only keep gencode >= A100 -ENV OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" -ENV OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" - +ARG BASE_IMAGE=ghcr.io/actions/actions-runner:latest FROM ${BASE_IMAGE} ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 +ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" +ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" RUN sudo apt-get -y update && sudo apt -y update # fontconfig: required by model doctr_det_predictor From 62183360a6f08d0fb9be2da5d2be9d5cacd20cf7 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 17:00:41 -0400 Subject: [PATCH 6/8] Add test --- docker/gcp-a100-runner-dind.dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 59c3cea997..93f203d024 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -8,6 +8,9 @@ ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" +RUN echo "${OVERRIDE_GENCODE}" +RUN echo "${OVERRIDE_GENCODE_CUDNN}" + RUN sudo apt-get -y update && sudo apt -y update # fontconfig: required by model doctr_det_predictor # libjpeg and libpng: optionally required by torchvision (vision#8342) From 48644b6e434d27511c802a54a95e92d8f46f7923 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 18:06:36 -0400 Subject: [PATCH 7/8] Another test --- docker/gcp-a100-runner-dind.dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 93f203d024..5d39eb686c 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -8,9 +8,6 @@ ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a" -RUN echo "${OVERRIDE_GENCODE}" -RUN echo "${OVERRIDE_GENCODE_CUDNN}" - RUN sudo apt-get -y update && sudo apt -y update # fontconfig: required by model doctr_det_predictor # libjpeg and libpng: optionally required by torchvision (vision#8342) @@ -36,7 +33,7 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # Use the CUDA installation scripts from pytorch/builder # Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c "set -x; source /workspace/builder/common/install_cuda.sh; install_124; export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; prune_124" +RUN sudo bash -c "set -x; source /workspace/builder/common/install_cuda.sh; install_124; prune_124" # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh From eec8b0ad98a668b88979b3e44716234e05a52410 Mon Sep 17 00:00:00 2001 From: Xu Zhao Date: Thu, 27 Jun 2024 18:30:28 -0400 Subject: [PATCH 8/8] Export envs --- docker/gcp-a100-runner-dind.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile index 5d39eb686c..59c3cea997 100644 --- a/docker/gcp-a100-runner-dind.dockerfile +++ b/docker/gcp-a100-runner-dind.dockerfile @@ -33,7 +33,7 @@ RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace # Use the CUDA installation scripts from pytorch/builder # Install CUDA 12.4 only to reduce docker size RUN cd /workspace; git clone https://github.com/pytorch/builder.git -RUN sudo bash -c "set -x; source /workspace/builder/common/install_cuda.sh; install_124; prune_124" +RUN sudo bash -c "set -x; source /workspace/builder/common/install_cuda.sh; install_124; export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; prune_124" # Install miniconda RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh