From 16948568b651cfb94cc98589b7a608ff155459ba Mon Sep 17 00:00:00 2001
From: Pratik Raj <rajpratik71@gmail.com>
Date: Thu, 28 May 2020 10:48:32 +0530
Subject: [PATCH] optimize size and time using "--no-cache-dir"

Using "--no-cache-dir" flag in pip install ,make sure dowloaded packages
by pip don't cached on system . This is a best practise which make sure
to fetch ftom repo instead of using local cached one . Further , in case
of Docker Containers , by restricing caching , we can reduce image size.
In term of stats , it depends upon the number of python packages
multiplied by their respective size . e.g for heavy packages with a lot
of dependencies it reduce a lot by don't caching pip packages.

Further , more detail information can be found at

https://medium.com/sciforce/strategies-of-docker-images-optimization-2ca9cc5719b6
---
 README.md                               | 14 +++++++-------
 cluster/README.md                       |  2 +-
 cluster/base/Dockerfile                 |  4 ++--
 cluster/calibrator/Dockerfile           |  2 +-
 cluster/evaluator/Dockerfile-cc         |  2 +-
 cluster/evaluator/Dockerfile-py         |  6 +++---
 cluster/evaluator/Dockerfile-ringmaster |  2 +-
 cluster/minigui/Dockerfile              |  8 ++++----
 cluster/ringmaster/lz-Dockerfile        |  2 +-
 cluster/selfplay/Dockerfile-py          |  8 ++++----
 cluster/selfplay/Dockerfile-tpu         |  2 +-
 cluster/trainer/Dockerfile              |  2 +-
 minigui/README.md                       |  6 +++---
 minigui/edgetpu/install_requirements.sh |  2 +-
 ml_perf/README.md                       |  8 ++++----
 testing/Dockerfile.v2                   |  6 +++---
 16 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 5180be40d..0a16e721c 100644
--- a/README.md
+++ b/README.md
@@ -72,8 +72,8 @@ intro to python development and virtualenv usage. The instructions after this
 point haven't been tested in environments that are not using virtualenv.
 
 ```shell
-pip3 install virtualenv
-pip3 install virtualenvwrapper
+pip3 install --no-cache-dir virtualenv
+pip3 install --no-cache-dir virtualenvwrapper
 ```
 
 Install Bazel
@@ -91,15 +91,15 @@ Install TensorFlow
 First set up and enter your virtualenv and then the shared requirements:
 
 ```
-pip3 install -r requirements.txt
+pip3 install --no-cache-dir -r requirements.txt
 ```
 
 Then, you'll need to choose to install the GPU or CPU tensorflow requirements:
 
-- GPU: `pip3 install "tensorflow-gpu==1.15.0"`.
+- GPU: `pip3 install --no-cache-dir "tensorflow-gpu==1.15.0"`.
   - *Note*: You must install [CUDA 10.0](https://developer.nvidia.com/cuda-10.0-download-archive). for Tensorflow
     1.13.0+.
-- CPU: `pip3 install "tensorflow==1.15.0"`.
+- CPU: `pip3 install --no-cache-dir "tensorflow==1.15.0"`.
 
 Setting up the Environment
 --------------------------
@@ -460,7 +460,7 @@ git clone https://github.com/tensorflow/minigo
 cd minigo
 
 # Install virtualenv.
-pip3 install virtualenv virtualenvwrapper
+pip3 install --no-cache-dir virtualenv virtualenvwrapper
 
 # Create a virtual environment
 virtualenv -p /usr/bin/python3 --system-site-packages "${HOME}/.venvs/minigo"
@@ -470,7 +470,7 @@ source "${HOME}/.venvs/minigo/bin/activate"
 
 # Install Minigo dependencies (TensorFlow for Cloud TPU is already installed as
 # part of the VM image).
-pip install -r requirements.txt
+pip install --no-cache-dir -r requirements.txt
 
 # When training on a Cloud TPU, the training work directory must be on Google Cloud Storage.
 # You'll need to choose your own globally unique bucket name.
diff --git a/cluster/README.md b/cluster/README.md
index efe88b980..7705f1e05 100644
--- a/cluster/README.md
+++ b/cluster/README.md
@@ -20,7 +20,7 @@ Make sure you have the following command line tools:
 And the Python [kubernetes-client](https://github.com/kubernetes-client/python)
 
 ```
-pip install kubernetes
+pip install --no-cache-dir kubernetes
 ```
 
 Next, make sure you have a Google Cloud Project with GKE Enabled
diff --git a/cluster/base/Dockerfile b/cluster/base/Dockerfile
index fd4399fa2..ecbd75e63 100644
--- a/cluster/base/Dockerfile
+++ b/cluster/base/Dockerfile
@@ -78,9 +78,9 @@ COPY staging/cc/configure_tensorflow.sh cc/configure_tensorflow.sh
 ENV CC_OPT_FLAGS -march=ivybridge
 ADD staging/requirements.txt /app/requirements.txt
 
-RUN pip3 install --upgrade pip setuptools
+RUN pip3 install --no-cache-dir --upgrade pip setuptools
 #TODO: just install what we need (keras?) so changes to requirements.txt don't trigger TF rebuilds
-RUN pip3 install -r /app/requirements.txt
+RUN pip3 install --no-cache-dir -r /app/requirements.txt
 
 ENV TF_NEED_TENSORRT 0
 RUN cc/configure_tensorflow.sh
diff --git a/cluster/calibrator/Dockerfile b/cluster/calibrator/Dockerfile
index 05fe2ac57..bf7df43cb 100644
--- a/cluster/calibrator/Dockerfile
+++ b/cluster/calibrator/Dockerfile
@@ -1,7 +1,7 @@
 ARG PROJECT
 FROM gcr.io/$PROJECT/cc-base:latest
 
-RUN pip3 install tensorflow==1.15.0
+RUN pip3 install --no-cache-dir tensorflow==1.15.0
 WORKDIR /app
 
 ENV BOARD_SIZE="19"
diff --git a/cluster/evaluator/Dockerfile-cc b/cluster/evaluator/Dockerfile-cc
index 4cdb7d7e2..0dbbfea4a 100644
--- a/cluster/evaluator/Dockerfile-cc
+++ b/cluster/evaluator/Dockerfile-cc
@@ -8,7 +8,7 @@ RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
     apt-get update -y && apt-get install google-cloud-sdk -y
 
 RUN apt-get install python3 python3-pip -y
-RUN pip3 install absl-py
+RUN pip3 install --no-cache-dir absl-py
 
 COPY staging/ /app
 WORKDIR /app
diff --git a/cluster/evaluator/Dockerfile-py b/cluster/evaluator/Dockerfile-py
index 2df0f34b1..c56e5edf5 100644
--- a/cluster/evaluator/Dockerfile-py
+++ b/cluster/evaluator/Dockerfile-py
@@ -36,9 +36,9 @@ WORKDIR /app
 
 ADD staging/requirements.txt /app/requirements.txt
 
-RUN pip3 install --upgrade pip
-RUN pip3 install -r /app/requirements.txt
-RUN pip3 install "tensorflow-gpu==1.15.0"
+RUN pip3 install --no-cache-dir --upgrade pip
+RUN pip3 install --no-cache-dir -r /app/requirements.txt
+RUN pip3 install --no-cache-dir "tensorflow-gpu==1.15.0"
 
 ADD staging/ /app
 
diff --git a/cluster/evaluator/Dockerfile-ringmaster b/cluster/evaluator/Dockerfile-ringmaster
index 219f429ac..f080a34f9 100644
--- a/cluster/evaluator/Dockerfile-ringmaster
+++ b/cluster/evaluator/Dockerfile-ringmaster
@@ -8,7 +8,7 @@ RUN export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" && \
 
 RUN apt-get install python3 python3-pip -y
 # TODO(AMJ): Get this to compile, determine base & pip requirementes
-RUN pip3 install absl-py
+RUN pip3 install --no-cache-dir absl-py
 
 COPY staging/ /app
 WORKDIR /app
diff --git a/cluster/minigui/Dockerfile b/cluster/minigui/Dockerfile
index 7641c20f5..eba2b3430 100644
--- a/cluster/minigui/Dockerfile
+++ b/cluster/minigui/Dockerfile
@@ -37,10 +37,10 @@ WORKDIR /app
 # Here for caching.
 ADD staging/requirements.txt /app/requirements.txt
 ADD staging/minigui/requirements.txt /app/minigui/requirements.txt
-RUN pip3 install --upgrade pip
-RUN pip3 install -r /app/requirements.txt
-RUN pip3 install -r /app/minigui/requirements.txt
-RUN pip3 install "tensorflow>=1.7,<1.8"
+RUN pip3 install --no-cache-dir --upgrade pip
+RUN pip3 install --no-cache-dir -r /app/requirements.txt
+RUN pip3 install --no-cache-dir -r /app/minigui/requirements.txt
+RUN pip3 install --no-cache-dir "tensorflow>=1.7,<1.8"
 
 ADD staging/ /app
 
diff --git a/cluster/ringmaster/lz-Dockerfile b/cluster/ringmaster/lz-Dockerfile
index 89af9b6be..8d036d478 100644
--- a/cluster/ringmaster/lz-Dockerfile
+++ b/cluster/ringmaster/lz-Dockerfile
@@ -23,7 +23,7 @@ RUN cp /app/build/leelaz /leelaz
 WORKDIR /
 RUN virtualenv -p /usr/bin/python2 mg_venv
 RUN . mg_venv/bin/activate \
-  && pip install gomill
+  && pip install --no-cache-dir gomill
 # ringmaster now available at /mg_venv/bin/ringmaster
 
 
diff --git a/cluster/selfplay/Dockerfile-py b/cluster/selfplay/Dockerfile-py
index ecb9a4427..4a7a6b65b 100644
--- a/cluster/selfplay/Dockerfile-py
+++ b/cluster/selfplay/Dockerfile-py
@@ -56,15 +56,15 @@ RUN git clone https://github.com/tensorflow/tensorflow.git && \
 WORKDIR /tensorflow
 
 ENV CI_BUILD_PYTHON python3
-RUN pip3 install --upgrade pip setuptools
-RUN pip3 install keras
+RUN pip3 install --no-cache-dir --upgrade pip setuptools
+RUN pip3 install --no-cache-dir keras
 
 # --cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0" \
 RUN ln -s /usr/bin/python3 /usr/bin/python  && tensorflow/tools/ci_build/builds/configured CPU \
     bazel build -c opt --copt=-march="haswell" \
         tensorflow/tools/pip_package:build_pip_package --verbose_failures && \
     bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/pip && \
-    pip3 --no-cache-dir install --upgrade /tmp/pip/tensorflow-*.whl && \
+    pip3 install --no-cache-dir --upgrade /tmp/pip/tensorflow-*.whl && \
     rm -rf /tmp/pip && \
     rm -rf /root/.cache
 
@@ -72,7 +72,7 @@ WORKDIR /
 
 ADD staging/requirements.txt /app/requirements.txt
 
-RUN pip3 install -r /app/requirements.txt
+RUN pip3 install --no-cache-dir -r /app/requirements.txt
 
 ADD staging/ /app
 
diff --git a/cluster/selfplay/Dockerfile-tpu b/cluster/selfplay/Dockerfile-tpu
index 8ce11c3cb..b61b8b459 100644
--- a/cluster/selfplay/Dockerfile-tpu
+++ b/cluster/selfplay/Dockerfile-tpu
@@ -2,7 +2,7 @@ ARG PROJECT
 FROM gcr.io/$PROJECT/cc-base:latest
 
 # Tensorflow is needed for gfile
-RUN pip3 install tensorflow==1.15.0
+RUN pip3 install --no-cache-dir tensorflow==1.15.0
 WORKDIR /app
 
 ARG RUNMODE
diff --git a/cluster/trainer/Dockerfile b/cluster/trainer/Dockerfile
index ee8a4ff92..df701fc53 100644
--- a/cluster/trainer/Dockerfile
+++ b/cluster/trainer/Dockerfile
@@ -1,7 +1,7 @@
 ARG PROJECT
 FROM gcr.io/$PROJECT/cc-base:latest
 
-RUN pip3 install tensorflow==1.15.0
+RUN pip3 install --no-cache-dir tensorflow==1.15.0
 WORKDIR /app
 
 ENV BOARD_SIZE="19"
diff --git a/minigui/README.md b/minigui/README.md
index 2d8c2d26b..f17a5e162 100644
--- a/minigui/README.md
+++ b/minigui/README.md
@@ -40,12 +40,12 @@ reduced performance. Currently, Minigui's study mode requires the C++ engine.
 
 ## Advanced Instructions
 
-1. Install the minigo python requirements: `pip install -r requirements.txt` (or
+1. Install the minigo python requirements: `pip install --no-cache-dir -r requirements.txt` (or
    `pip3 ...` depending how you've set things up).
 
-1. Install TensorFlow (here, we use the CPU install): `pip install "tensorflow>=1.7,<1.8"`
+1. Install TensorFlow (here, we use the CPU install): `pip install --no-cache-dir "tensorflow>=1.7,<1.8"`
 
-1. Install the **minigui** python requirements: `pip install -r minigui/requirements.txt`
+1. Install the **minigui** python requirements: `pip install --no-cache-dir -r minigui/requirements.txt`
 
 1. Install the [Google Cloud SDK](https://cloud.google.com/sdk/downloads)
 
diff --git a/minigui/edgetpu/install_requirements.sh b/minigui/edgetpu/install_requirements.sh
index 7724d3ad8..baf2381b8 100644
--- a/minigui/edgetpu/install_requirements.sh
+++ b/minigui/edgetpu/install_requirements.sh
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-python3 -m pip install -r minigui/requirements.txt
+python3 -m pip install --no-cache-dir -r minigui/requirements.txt
 
 # For Raspberry Pi
 if grep -q "Raspberry Pi" /sys/firmware/devicetree/base/model; then
diff --git a/ml_perf/README.md b/ml_perf/README.md
index 44cf911b9..cf2a52fb2 100644
--- a/ml_perf/README.md
+++ b/ml_perf/README.md
@@ -25,17 +25,17 @@ cards.
     cd minigo
 
     # Create a virtualenv (this step is optional but highly recommended).
-    pip3 install virtualenv
-    pip3 install virtualenvwrapper
+    pip3 install --no-cache-dir virtualenv
+    pip3 install --no-cache-dir virtualenvwrapper
     virtualenv -p /usr/bin/python3 --system-site-packages $HOME/.venvs/minigo
     source $HOME/.venvs/minigo/bin/activate
 
     # Install Python dependencies
-    pip3 install -r requirements.txt
+    pip3 install --no-cache-dir -r requirements.txt
 
     # Install Python Tensorflow for GPU
     # (alternatively use "tensorflow==1.15.0" for CPU Tensorflow)
-    pip3 install "tensorflow-gpu==1.15.0"
+    pip3 install --no-cache-dir "tensorflow-gpu==1.15.0"
 
     # Install bazel
     BAZEL_VERSION=0.24.1
diff --git a/testing/Dockerfile.v2 b/testing/Dockerfile.v2
index 8093127ca..63c3c5b25 100644
--- a/testing/Dockerfile.v2
+++ b/testing/Dockerfile.v2
@@ -40,9 +40,9 @@ WORKDIR /workspace
 COPY bootstrap_v2.sh /workspace/bootstrap_v2.sh
 
 COPY staging/requirements.txt /workspace/requirements.txt
-RUN pip3 install --upgrade pip setuptools
-RUN pip3 install -r /workspace/requirements.txt
-RUN pip3 install "tensorflow==1.15.0"
+RUN pip3 install --no-cache-dir --upgrade pip setuptools
+RUN pip3 install --no-cache-dir -r /workspace/requirements.txt
+RUN pip3 install --no-cache-dir "tensorflow==1.15.0"
 
 # Workaround because test-infra/scenarios/execute.py runs `/usr/bin/env python`
 # and not python2.7 explicitly.