From ce509c79c51cb05447df015b74139c6a052b40fe Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Tue, 14 Mar 2023 14:16:52 +0100
Subject: [PATCH 01/12] feat: dockerize llamacpp

---
 .dockerignore | 24 ++++++++++++++++++++++++
 Dockerfile    | 17 +++++++++++++++++
 2 files changed, 41 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000000000..952990f268900
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,24 @@
+*.o
+*.a
+.cache/
+.vs/
+.vscode/
+.DS_Store
+
+build/
+build-em/
+build-debug/
+build-release/
+build-static/
+build-no-accel/
+build-sanitize-addr/
+build-sanitize-thread/
+
+models/*
+
+/main
+/quantize
+
+arm_neon.h
+compile_commands.json
+Dockerfile
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000..6b16aa06c9e91
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+ARG UBUNTU_VERSION=22.04
+
+FROM ubuntu:$UBUNTU_VERSION
+
+RUN apt-get update && \
+    apt-get install -y build-essential python3 python3-pip
+
+RUN pip install --upgrade pip setuptools wheel \
+    && pip install torch torchvision torchaudio sentencepiece numpy
+
+WORKDIR /app
+
+COPY . .
+
+RUN make
+
+ENTRYPOINT [ "/app/main" ]
\ No newline at end of file

From 6d9ad1058b3448ed98bb7576e6ecead366987ba6 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Tue, 14 Mar 2023 15:03:25 +0100
Subject: [PATCH 02/12] feat: split build & runtime stages

---
 Dockerfile | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 6b16aa06c9e91..a0f87de9b4ec6 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 ARG UBUNTU_VERSION=22.04
 
-FROM ubuntu:$UBUNTU_VERSION
+FROM ubuntu:$UBUNTU_VERSION as build
 
 RUN apt-get update && \
     apt-get install -y build-essential python3 python3-pip
@@ -14,4 +14,8 @@ COPY . .
 
 RUN make
 
-ENTRYPOINT [ "/app/main" ]
\ No newline at end of file
+FROM ubuntu:$UBUNTU_VERSION as runtime
+
+COPY --from=build /app/main /main
+
+ENTRYPOINT [ "/main" ]
\ No newline at end of file

From 9959b1f374e19d38a649d77792cb1310862802c1 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Tue, 14 Mar 2023 15:46:03 +0100
Subject: [PATCH 03/12] split dockerfile into main & tools

---
 .devops/main.Dockerfile                | 18 ++++++++++++++++++
 Dockerfile => .devops/tools.Dockerfile | 10 ++--------
 2 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 .devops/main.Dockerfile
 rename Dockerfile => .devops/tools.Dockerfile (70%)

diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile
new file mode 100644
index 0000000000000..cd575efa013d1
--- /dev/null
+++ b/.devops/main.Dockerfile
@@ -0,0 +1,18 @@
+ARG UBUNTU_VERSION=22.04
+
+FROM ubuntu:$UBUNTU_VERSION as build
+
+RUN apt-get update && \
+    apt-get install -y build-essential
+
+WORKDIR /app
+
+COPY . .
+
+RUN make
+
+FROM ubuntu:$UBUNTU_VERSION as runtime
+
+COPY --from=build /app/main /main
+
+ENTRYPOINT [ "/main" ]
\ No newline at end of file
diff --git a/Dockerfile b/.devops/tools.Dockerfile
similarity index 70%
rename from Dockerfile
rename to .devops/tools.Dockerfile
index a0f87de9b4ec6..1cb2123cc1ac3 100644
--- a/Dockerfile
+++ b/.devops/tools.Dockerfile
@@ -10,12 +10,6 @@ RUN pip install --upgrade pip setuptools wheel \
 
 WORKDIR /app
 
-COPY . .
+COPY ./convert-pth-to-ggml.py .
 
-RUN make
-
-FROM ubuntu:$UBUNTU_VERSION as runtime
-
-COPY --from=build /app/main /main
-
-ENTRYPOINT [ "/main" ]
\ No newline at end of file
+ENTRYPOINT ["python3", "convert-pth-to-ggml.py"]
\ No newline at end of file

From a4590d3be93746bbf75326e2de843be7379020c3 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Tue, 14 Mar 2023 18:04:21 +0100
Subject: [PATCH 04/12] add quantize into tool docker image

---
 .devops/tools.Dockerfile |  6 ++++--
 .devops/tools.sh         | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100755 .devops/tools.sh

diff --git a/.devops/tools.Dockerfile b/.devops/tools.Dockerfile
index 1cb2123cc1ac3..618cdddc4973f 100644
--- a/.devops/tools.Dockerfile
+++ b/.devops/tools.Dockerfile
@@ -10,6 +10,8 @@ RUN pip install --upgrade pip setuptools wheel \
 
 WORKDIR /app
 
-COPY ./convert-pth-to-ggml.py .
+COPY . .
 
-ENTRYPOINT ["python3", "convert-pth-to-ggml.py"]
\ No newline at end of file
+RUN make
+
+ENTRYPOINT ["/app/.devops/tools.sh"]
\ No newline at end of file
diff --git a/.devops/tools.sh b/.devops/tools.sh
new file mode 100755
index 0000000000000..e88f0bca62936
--- /dev/null
+++ b/.devops/tools.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Read the first argument into a variable
+arg1="$1"
+
+# Shift the arguments to remove the first one
+shift
+
+# Join the remaining arguments into a single string
+arg2="$@"
+
+if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
+  python3 ./convert-pth-to-ggml.py $arg2
+elif  [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
+  /app/quantize $arg2
+else
+  echo "Unknown command: $arg1"
+  echo "Valid commands: --convert (-c) or --quantize (-q)"
+fi
\ No newline at end of file

From 901c34d1b180271e6b49818adda59b4578bcbb34 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <hounter.caza@gmail.com>
Date: Tue, 14 Mar 2023 20:46:41 +0100
Subject: [PATCH 05/12] Update .devops/tools.sh

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 .devops/tools.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.devops/tools.sh b/.devops/tools.sh
index e88f0bca62936..ca32e45030255 100755
--- a/.devops/tools.sh
+++ b/.devops/tools.sh
@@ -10,10 +10,10 @@ shift
 arg2="$@"
 
 if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
-  python3 ./convert-pth-to-ggml.py $arg2
+    python3 ./convert-pth-to-ggml.py $arg2
 elif  [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
-  /app/quantize $arg2
+    /app/quantize $arg2
 else
-  echo "Unknown command: $arg1"
-  echo "Valid commands: --convert (-c) or --quantize (-q)"
+    echo "Unknown command: $arg1"
+    echo "Valid commands: --convert (-c) or --quantize (-q)"
 fi
\ No newline at end of file

From 44f74671cbe1b1d2d33159682441f175546aa351 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Wed, 15 Mar 2023 17:45:33 +0100
Subject: [PATCH 06/12] add docker action pipeline

---
 .devops/{tools.Dockerfile => full.Dockerfile} |  0
 .devops/tools.sh                              | 11 ++--
 .github/workflows/docker.yml                  | 50 +++++++++++++++++++
 3 files changed, 58 insertions(+), 3 deletions(-)
 rename .devops/{tools.Dockerfile => full.Dockerfile} (100%)
 create mode 100644 .github/workflows/docker.yml

diff --git a/.devops/tools.Dockerfile b/.devops/full.Dockerfile
similarity index 100%
rename from .devops/tools.Dockerfile
rename to .devops/full.Dockerfile
diff --git a/.devops/tools.sh b/.devops/tools.sh
index ca32e45030255..35d22992e6db1 100755
--- a/.devops/tools.sh
+++ b/.devops/tools.sh
@@ -11,9 +11,14 @@ arg2="$@"
 
 if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
     python3 ./convert-pth-to-ggml.py $arg2
-elif  [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
+elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
     /app/quantize $arg2
+elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
+    /app/main $arg2
 else
     echo "Unknown command: $arg1"
-    echo "Valid commands: --convert (-c) or --quantize (-q)"
-fi
\ No newline at end of file
+    echo "Available commands: "
+    echo "  --run (-r)"
+    echo "  --convert (-c)"
+    echo "  --quantize (-q)"
+fi
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 0000000000000..4e20453a29c8e
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,50 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# GitHub recommends pinning actions to a commit SHA.
+# To get a newer version, you will need to update the SHA.
+# You can also reference a tag or branch, but the action may change without warning.
+
+name: Publish Docker image
+
+on:
+  push:
+    branches:
+      - master
+
+jobs:
+  push_to_registry:
+    name: Push Docker image to Docker Hub
+    runs-on: ubuntu-latest
+
+    strategy:
+      matrix:
+        config:
+          - { tag: "latest", dockerfile: ".devops/main.Dockerfile" }
+          - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
+
+    steps:
+      - name: Check out the repo
+        uses: actions/checkout@v3
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: true
+          tags: "bernatvadell/llamacpp:${{ matrix.config.tag }}"
+          file: ${{ matrix.config.dockerfile }}

From 3bcfc2b83de8395b7b8c6d19b1e2890557898a50 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Thu, 16 Mar 2023 11:31:49 +0100
Subject: [PATCH 07/12] change CI to publish at github docker registry

---
 .github/workflows/docker.yml | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 4e20453a29c8e..eae88a6e332e4 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -38,13 +38,14 @@ jobs:
       - name: Log in to Docker Hub
         uses: docker/login-action@v2
         with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          registry: docker.pkg.github.com
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}  
 
       - name: Build and push Docker image
         uses: docker/build-push-action@v4
         with:
           context: .
           push: true
-          tags: "bernatvadell/llamacpp:${{ matrix.config.tag }}"
+          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
           file: ${{ matrix.config.dockerfile }}

From c20281924c70efc3f6b2b4ebd7e658efca4525a1 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Thu, 16 Mar 2023 11:43:49 +0100
Subject: [PATCH 08/12] fix name runs-on macOS-latest is macos-latest
 (lowercase)

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1a068ae75f966..94f199cb8e04a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,7 +19,7 @@ jobs:
           make
 
   macOS-latest:
-    runs-on: macOS-latest
+    runs-on: macos-latest
 
     steps:
       - name: Clone

From c6b2c6fd8f909c1c5ebcab4aa40bb9d17c433bc3 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Thu, 16 Mar 2023 11:44:00 +0100
Subject: [PATCH 09/12] include docker versioned images

---
 .github/workflows/docker.yml | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index eae88a6e332e4..b1c7098f6b9e4 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -10,6 +10,7 @@
 name: Publish Docker image
 
 on:
+  pull_request:
   push:
     branches:
       - master
@@ -18,13 +19,23 @@ jobs:
   push_to_registry:
     name: Push Docker image to Docker Hub
     runs-on: ubuntu-latest
-
+    env:
+      COMMIT_SHA: ${{ github.sha }}
     strategy:
       matrix:
         config:
           - { tag: "latest", dockerfile: ".devops/main.Dockerfile" }
           - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
-
+          - {
+              tag: "light",
+              dockerfile: ".devops/main.Dockerfile",
+              include_hash: true,
+            }
+          - {
+              tag: "full",
+              dockerfile: ".devops/full.Dockerfile",
+              include_hash: true,
+            }
     steps:
       - name: Check out the repo
         uses: actions/checkout@v3
@@ -40,12 +51,12 @@ jobs:
         with:
           registry: docker.pkg.github.com
           username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}  
+          password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Build and push Docker image
+      - name: Build and push Docker image (tagged)
         uses: docker/build-push-action@v4
         with:
           context: .
           push: true
-          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
+          tags: "ghcr.io/ggerganov/llama.cpp:{{ matrix.config.tag }}{{ matrix.config.include_hash && '-{{ env.COMMIT_SHA }}' || '' }}"
           file: ${{ matrix.config.dockerfile }}

From 4941df7f3bcb600c814d3287197ad807ac3c3576 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Thu, 16 Mar 2023 12:00:05 +0100
Subject: [PATCH 10/12] fix github action docker

---
 .github/workflows/docker.yml | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index b1c7098f6b9e4..5dd07221161ce 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -24,18 +24,8 @@ jobs:
     strategy:
       matrix:
         config:
-          - { tag: "latest", dockerfile: ".devops/main.Dockerfile" }
+          - { tag: "light", dockerfile: ".devops/main.Dockerfile" }
           - { tag: "full", dockerfile: ".devops/full.Dockerfile" }
-          - {
-              tag: "light",
-              dockerfile: ".devops/main.Dockerfile",
-              include_hash: true,
-            }
-          - {
-              tag: "full",
-              dockerfile: ".devops/full.Dockerfile",
-              include_hash: true,
-            }
     steps:
       - name: Check out the repo
         uses: actions/checkout@v3
@@ -49,14 +39,23 @@ jobs:
       - name: Log in to Docker Hub
         uses: docker/login-action@v2
         with:
-          registry: docker.pkg.github.com
+          registry: ghcr.io
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Build and push Docker image (tagged)
+      - name: Build and push Docker image (versioned)
+        if: github.event_name == 'push'
         uses: docker/build-push-action@v4
         with:
           context: .
           push: true
-          tags: "ghcr.io/ggerganov/llama.cpp:{{ matrix.config.tag }}{{ matrix.config.include_hash && '-{{ env.COMMIT_SHA }}' || '' }}"
+          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
           file: ${{ matrix.config.dockerfile }}
+
+      - name: Build and push Docker image (tagged)
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          push: github.event_name == 'push'
+          tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
+          file: ${{ matrix.config.dockerfile }}
\ No newline at end of file

From 0bc1e806d164b6d5102040f7ce552016379bd8a5 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Thu, 16 Mar 2023 12:02:22 +0100
Subject: [PATCH 11/12] fix docker.yml

---
 .github/workflows/docker.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 5dd07221161ce..bc9aff7b7ffae 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -56,6 +56,6 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           context: .
-          push: github.event_name == 'push'
+          push: ${{ github.event_name == 'push' }}
           tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
           file: ${{ matrix.config.dockerfile }}
\ No newline at end of file

From 79a48d9876436acbe28b18469ba408d6250c51a7 Mon Sep 17 00:00:00 2001
From: Bernat Vadell <bvadell@feimsoft.com>
Date: Fri, 17 Mar 2023 10:31:53 +0100
Subject: [PATCH 12/12] feat: include all-in-one command tool & update
 readme.md

---
 .devops/tools.sh       | 32 ++++++++++++++++----
 README.md              | 32 ++++++++++++++++++++
 convert-pth-to-ggml.py |  6 +++-
 download-pth.py        | 66 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 130 insertions(+), 6 deletions(-)
 create mode 100644 download-pth.py

diff --git a/.devops/tools.sh b/.devops/tools.sh
index 35d22992e6db1..b5711c94e1832 100755
--- a/.devops/tools.sh
+++ b/.devops/tools.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 # Read the first argument into a variable
 arg1="$1"
@@ -12,13 +13,34 @@ arg2="$@"
 if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
     python3 ./convert-pth-to-ggml.py $arg2
 elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
-    /app/quantize $arg2
+    ./quantize $arg2
 elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
-    /app/main $arg2
+    ./main $arg2
+elif [[ $arg1 == '--download' || $arg1 == '-d' ]]; then
+    python3 ./download-pth.py $arg2
+elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
+    echo "Downloading model..."
+    python3 ./download-pth.py "$1" "$2"
+    echo "Converting PTH to GGML..."
+    for i in `ls $1/$2/ggml-model-f16.bin*`; do
+        if [ -f "${i/f16/q4_0}" ]; then
+            echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
+        else
+            echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
+            ./quantize "$i" "${i/f16/q4_0}" 2
+        fi
+    done
 else
     echo "Unknown command: $arg1"
     echo "Available commands: "
-    echo "  --run (-r)"
-    echo "  --convert (-c)"
-    echo "  --quantize (-q)"
+    echo "  --run (-r): Run a model previously converted into ggml"
+    echo "              ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512"
+    echo "  --convert (-c): Convert a llama model into ggml"
+    echo "              ex: \"/models/7B/\" 1"
+    echo "  --quantize (-q): Optimize with quantization process ggml"
+    echo "              ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
+    echo "  --download (-d): Download original llama model from CDN: https://agi.gpt4.org/llama/"
+    echo "              ex: \"/models/\" 7B"
+    echo "  --all-in-one (-a): Execute --download, --convert & --quantize"
+    echo "              ex: \"/models/\" 7B"
 fi
diff --git a/README.md b/README.md
index 15e1b9a2dd5ed..8cf59f4188c11 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Supported platforms:
 - [X] Mac OS
 - [X] Linux
 - [X] Windows (via CMake)
+- [X] Docker
 
 ---
 
@@ -194,6 +195,37 @@ Finally, copy the `llama` binary and the model files to your device storage. Her
 
 https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4
 
+### Docker
+
+#### Prerequisites
+* Docker must be installed and running on your system.
+* Create a folder to store big models & intermediate files (in ex. im using /llama/models)
+
+#### Images
+We have two Docker images available for this project:
+
+1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
+2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file.
+
+#### Usage
+
+The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image.
+
+ ```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
+```
+
+On complete, you are ready to play!
+
+```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
+```
+
+or with light image:
+
+```bash
+docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
+```
 
 ## Limitations
 
diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 5c36e9c09dc0d..d0eb213c8666e 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -16,7 +16,7 @@
 # At the start of the ggml file we write the model parameters
 # and vocabulary.
 #
-
+import os
 import sys
 import json
 import struct
@@ -64,6 +64,10 @@ def get_n_parts(dim):
         sys.exit(1)
     fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
 
+if os.path.exists(fname_out):
+    print(f"Skip conversion, it already exists: {fname_out}")
+    sys.exit(0)
+
 with open(fname_hparams, "r") as f:
     hparams = json.load(f)
 
diff --git a/download-pth.py b/download-pth.py
new file mode 100644
index 0000000000000..129532c0c6b40
--- /dev/null
+++ b/download-pth.py
@@ -0,0 +1,66 @@
+import os
+import sys
+from tqdm import tqdm
+import requests
+
+if len(sys.argv) < 3:
+    print("Usage: download-pth.py dir-model model-type\n")
+    print("  model-type: Available models 7B, 13B, 30B or 65B")
+    sys.exit(1)
+
+modelsDir = sys.argv[1]
+model = sys.argv[2]
+
+num = {
+    "7B": 1,
+    "13B": 2,
+    "30B": 4,
+    "65B": 8,
+}
+
+if model not in num:
+    print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B")
+    sys.exit(1)
+
+print(f"Downloading model {model}")
+
+files = ["checklist.chk", "params.json"]
+
+for i in range(num[model]):
+    files.append(f"consolidated.0{i}.pth")
+
+resolved_path = os.path.abspath(os.path.join(modelsDir, model))
+os.makedirs(resolved_path, exist_ok=True)
+
+for file in files:
+    dest_path = os.path.join(resolved_path, file)
+    
+    if os.path.exists(dest_path):
+        print(f"Skip file download, it already exists: {file}")
+        continue
+
+    url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}"
+    response = requests.get(url, stream=True)
+    with open(dest_path, 'wb') as f:
+        with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+                    t.update(len(chunk))
+
+files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
+for file in files2:
+    dest_path = os.path.join(modelsDir, file)
+    
+    if os.path.exists(dest_path):
+        print(f"Skip file download, it already exists: {file}")
+        continue
+    
+    url = f"https://agi.gpt4.org/llama/LLaMA/{file}"
+    response = requests.get(url, stream=True)
+    with open(dest_path, 'wb') as f:
+        with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    f.write(chunk)
+                    t.update(len(chunk))
\ No newline at end of file