Skip to content

Commit aee35a5

Browse files
authored
Merge branch 'main' into patch-44
2 parents f44fad1 + 083fdaf commit aee35a5

File tree

16 files changed

+333
-180
lines changed

16 files changed

+333
-180
lines changed

.ci/scripts/check_gibberish

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ else
2424
fi
2525
fi
2626

27+
#######################################################################
28+
#
29+
# check whether aspell spell check evailable
30+
31+
if command -v aspell &> /dev/null; then
32+
echo "Checking $TMPFILE for gibberish"
33+
else
34+
echo "Aspell is not installed or not in PATH."
35+
echo "Gibberish unchecked in $TMPFILE"
36+
exit 0
37+
fi
38+
2739
#######################################################################
2840
#
2941
# run spell check on the extracted sequence

.ci/scripts/run-docs

Lines changed: 62 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -1,144 +1,67 @@
1-
# /bin/bash -x
1+
#!/bin/bash -x
22

3-
if [ "X$1" == "X" ]; then
3+
# Check if an argument was provided
4+
if [ -z "$1" ]; then
45
echo "Must specify document to run"
56
exit 1
67
fi
78

8-
if [ "$1" == "readme" ]; then
9-
echo "::group::Create script to run README"
10-
python3 torchchat/utils/scripts/updown.py --create-sections --file README.md --replace 'llama3.1:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh
11-
# for good measure, if something happened to updown processor,
12-
# and it did not error out, fail with an exit 1
13-
echo "exit 1" >> ./run-readme.sh
14-
echo "::endgroup::"
15-
16-
echo "::group::Run README"
17-
echo "*******************************************"
18-
cat ./run-readme.sh
19-
echo "*******************************************"
20-
bash -x ./run-readme.sh
21-
echo "::endgroup::"
22-
23-
exit 0
24-
fi
25-
26-
if [ "$1" == "quantization" ]; then
27-
echo "::group::Create script to run quantization"
28-
python3 torchchat/utils/scripts/updown.py --create-sections --file docs/quantization.md --replace llama3:stories15M --suppress huggingface-cli,HF_TOKEN > ./run-quantization.sh
29-
# for good measure, if something happened to updown processor,
30-
# and it did not error out, fail with an exit 1
31-
echo "exit 1" >> ./run-quantization.sh
32-
echo "::endgroup::"
33-
34-
echo "::group::Run quantization"
35-
echo "*******************************************"
36-
cat ./run-quantization.sh
37-
echo "*******************************************"
38-
bash -x ./run-quantization.sh
39-
echo "::endgroup::"
40-
41-
exit 0
42-
fi
43-
44-
if [ "$1" == "gguf" ]; then
45-
echo "::group::Create script to run gguf"
46-
python3 torchchat/utils/scripts/updown.py --file docs/GGUF.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-gguf.sh
47-
# for good measure, if something happened to updown processor,
48-
# and it did not error out, fail with an exit 1
49-
echo "exit 1" >> ./run-gguf.sh
50-
echo "::endgroup::"
51-
52-
echo "::group::Run gguf"
53-
echo "*******************************************"
54-
cat ./run-gguf.sh
55-
echo "*******************************************"
56-
bash -x ./run-gguf.sh
57-
echo "::endgroup::"
58-
fi
59-
60-
61-
if [ "$1" == "advanced" ]; then
62-
echo "::group::Create script to run advanced"
63-
python3 torchchat/utils/scripts/updown.py --file docs/ADVANCED-USERS.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-advanced.sh
64-
# for good measure, if something happened to updown processor,
65-
# and it did not error out, fail with an exit 1
66-
echo "exit 1" >> ./run-advanced.sh
67-
echo "::endgroup::"
68-
69-
echo "::group::Run advanced"
70-
echo "*******************************************"
71-
cat ./run-advanced.sh
72-
echo "*******************************************"
73-
bash -x ./run-advanced.sh
74-
echo "::endgroup::"
75-
fi
76-
77-
if [ "$1" == "evaluation" ]; then
78-
echo "::group::Create script to run evaluation"
79-
python3 torchchat/utils/scripts/updown.py --file torchchat/utils/docs/evaluation.md --replace 'llama3:stories15M,-l 3:-l 2' --suppress huggingface-cli,HF_TOKEN > ./run-evaluation.sh
80-
# for good measure, if something happened to updown processor,
81-
# and it did not error out, fail with an exit 1
82-
echo "exit 1" >> ./run-evaluation.sh
83-
echo "::endgroup::"
84-
85-
echo "::group::Run evaluation"
86-
echo "*******************************************"
87-
cat ./run-evaluation.sh
88-
echo "*******************************************"
89-
bash -x ./run-evaluation.sh
90-
fi
91-
92-
if [ "$1" == "multimodal" ]; then
93-
94-
# Expecting that this might fail this test as-is, because
95-
# it's the first on-pr test depending on github secrets for access with HF token access
96-
97-
echo "::group::Create script to run multimodal"
98-
python3 torchchat/utils/scripts/updown.py --file docs/multimodal.md > ./run-multimodal.sh
99-
# for good measure, if something happened to updown processor,
100-
# and it did not error out, fail with an exit 1
101-
echo "exit 1" >> ./run-multimodal.sh
102-
echo "::endgroup::"
103-
104-
echo "::group::Run multimodal"
105-
echo "*******************************************"
106-
cat ./run-multimodal.sh
107-
echo "*******************************************"
108-
bash -x ./run-multimodal.sh
109-
echo "::endgroup::"
110-
fi
111-
112-
if [ "$1" == "native" ]; then
113-
114-
echo "::group::Create script to run native-execution"
115-
python3 torchchat/utils/scripts/updown.py --file docs/native-execution.md > ./run-native.sh
116-
# for good measure, if something happened to updown processor,
117-
# and it did not error out, fail with an exit 1
118-
echo "exit 1" >> ./run-native.sh
119-
echo "::endgroup::"
120-
121-
echo "::group::Run native-execution"
122-
echo "*******************************************"
123-
cat ./run-native.sh
124-
echo "*******************************************"
125-
bash -x ./run-native.sh
126-
echo "::endgroup::"
127-
fi
128-
129-
if [ "$1" == "distributed" ]; then
130-
131-
echo "::group::Create script to run distributed"
132-
python3 torchchat/utils/scripts/updown.py --file docs/distributed.md > ./run-distributed.sh
133-
# for good measure, if something happened to updown processor,
134-
# and it did not error out, fail with an exit 1
135-
echo "exit 1" >> ./run-distributed.sh
136-
echo "::endgroup::"
137-
138-
echo "::group::Run distributed"
139-
echo "*******************************************"
140-
cat ./run-distributed.sh
141-
echo "*******************************************"
142-
bash -x ./run-distributed.sh
143-
echo "::endgroup::"
144-
fi
9+
# Pre-initialize variables
10+
filepath=""
11+
parameters="--replace 'llama3:stories15M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN"
12+
script_name="./run-${1}.sh" # Dynamically initialize script name
13+
14+
# Use a case statement to handle the $1 argument
15+
case "$1" in
16+
"readme")
17+
filepath="README.md"
18+
;;
19+
"quantization")
20+
filepath="docs/quantization.md"
21+
;;
22+
"gguf")
23+
filepath="docs/GGUF.md"
24+
;;
25+
"advanced")
26+
filepath="docs/ADVANCED-USERS.md"
27+
;;
28+
"evaluation")
29+
filepath="torchchat/utils/docs/evaluation.md"
30+
;;
31+
"multimodal")
32+
filepath="docs/multimodal.md"
33+
parameters="" # Clear parameters
34+
;;
35+
"native")
36+
filepath="docs/native-execution.md"
37+
parameters="" # Clear parameters
38+
;;
39+
"distributed")
40+
filepath="docs/distributed.md"
41+
parameters="--replace 'llama3.1:stories110M,-l3:-l2' --suppress huggingface-cli,HF_TOKEN" # Use stories110M to avoid need for authentication
42+
;;
43+
"local")
44+
filepath="docs/local-model.md"
45+
parameters="" # Clear parameters
46+
;;
47+
48+
*)
49+
echo "Unknown option: $1"
50+
exit 1
51+
;;
52+
esac
53+
54+
# Generate the script
55+
echo "::group::Create script to run $1"
56+
python3 torchchat/utils/scripts/updown.py --file "$filepath" $parameters > "$script_name"
57+
# if something happened to updown processor, and it did not error out, fail with an exit 1
58+
echo "exit 1" >> "$script_name"
59+
echo "::endgroup::"
60+
61+
# Run the script
62+
echo "::group::Run $1"
63+
echo "*******************************************"
64+
cat "$script_name"
65+
echo "*******************************************"
66+
bash -x "$script_name"
67+
echo "::endgroup::"

.github/workflows/more-tests.yml

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,10 @@ jobs:
4040
echo "::endgroup::"
4141
4242
echo "::group::Run inference"
43-
export MODEL_PATH=checkpoints/stories15M/stories15M.pt
43+
export MODEL_DIR=checkpoints/stories15M/
44+
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
4445
export MODEL_NAME=stories15M
45-
export MODEL_DIR=/tmp
46+
4647
4748
for DTYPE in bfloat16 float16 float32; do
4849
###################################################################
@@ -145,3 +146,65 @@ jobs:
145146
echo "tests complete"
146147
echo "******************************************"
147148
echo "::endgroup::"
149+
150+
test-sdpa-backends-export:
151+
permissions:
152+
id-token: write
153+
contents: read
154+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
155+
with:
156+
runner: linux.g5.4xlarge.nvidia.gpu
157+
gpu-arch-type: cuda
158+
gpu-arch-version: "12.4"
159+
timeout: 60
160+
script: |
161+
set -xeou pipefail
162+
echo "::group::Print machine info"
163+
uname -a
164+
echo "::endgroup::"
165+
166+
echo "::group::Download checkpoints"
167+
# Install requirements
168+
./install/install_requirements.sh cuda
169+
pip3 list
170+
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
171+
echo "::endgroup::"
172+
173+
echo "::group::Download checkpoints"
174+
mkdir -p checkpoints/stories15M
175+
pushd checkpoints/stories15M
176+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
177+
wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
178+
popd
179+
echo "::endgroup::"
180+
181+
echo "::group::Run inference"
182+
export MODEL_DIR=checkpoints/stories15M/
183+
export MODEL_PATH=${MODEL_DIR}/stories15M.pt
184+
export MODEL_NAME=stories15M
185+
186+
./torchchat/utils/scripts/build_native.sh aoti
187+
188+
for DEVICE in cpu cuda; do
189+
# depending on how the parameter passing works, may only be able to do bfloat16 for aoti_run, similar to runner-cuda-dtype.yml
190+
# (although the runner environment should not have an opinion what we us in the artifact, and we might suitably abstract that)
191+
for DTYPE in bfloat16 float16 float32; do
192+
for SDPA in 'math' 'flash_attention' 'efficient_attention' 'cudnn_attention'; do
193+
echo "***************************************************************"
194+
echo "*** $DEVICE $DTYPE $SDPA"
195+
###################################################################
196+
# Export DSO and run with Python
197+
python torchchat.py export --output-dso dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
198+
python torchchat.py generate --dso-path dso.so --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE} --temperature 0 --prompt "Once upon a time"
199+
###################################################################
200+
# Export AOTI and run with aoti_run
201+
python torchchat.py export --output-aoti /tmp/model.pt2 --checkpoint-path ${MODEL_PATH} --attention-backend ${SDPA} --device ${DEVICE} --dtype ${DTYPE}
202+
./cmake-out/aoti_run /tmp/model.pt2 -z ${MODEL_DIR}/tokenizer.model -i "Once upon a time"
203+
###################################################################
204+
done
205+
done
206+
done
207+
208+
echo "tests complete"
209+
echo "******************************************"
210+
echo "::endgroup::"

.github/workflows/run-readme-pr-mps.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ jobs:
1515
conda create -y -n test-readme-mps-macos python=3.10.11 llvm-openmp
1616
conda activate test-readme-mps-macos
1717
set -x
18-
# NS: Remove previous installation of torch first
19-
# as this script does not isntall anything into conda env but rather as system dep
18+
# NS: Remove previous installation of torch first
19+
# as this script does not install anything into conda env but rather as system dep
2020
pip3 uninstall -y torch || true
2121
set -eou pipefail
2222
@@ -37,6 +37,7 @@ jobs:
3737
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3838
with:
3939
runner: macos-m1-14
40+
timeout: 60
4041
script: |
4142
set -x
4243
conda create -y -n test-quantization-mps-macos python=3.10.11

.github/workflows/run-readme-pr.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,25 @@ jobs:
306306
echo "::endgroup::"
307307
308308
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs native
309+
310+
test-distributed-cuda:
311+
permissions:
312+
id-token: write
313+
contents: read
314+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
315+
with:
316+
runner: linux.g5.4xlarge.nvidia.gpu
317+
gpu-arch-type: cuda
318+
gpu-arch-version: "12.4"
319+
timeout: 60
320+
script: |
321+
echo "::group::Print machine info"
322+
uname -a
323+
echo "::endgroup::"
324+
325+
.ci/scripts/run-docs distributed
326+
327+
echo "::group::Completion"
328+
echo "tests complete"
329+
echo "*******************************************"
330+
echo "::endgroup::"

README.md

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
torchchat is a small codebase showcasing the ability to run large language models (LLMs) seamlessly. With torchchat, you can run LLMs using Python, within your own (C/C++) application (desktop or server) and on iOS and Android.
44

55
> [!IMPORTANT]
6-
> Update September 25, 2024: torchchat has multimodal support for **Llama3.2 11B**!!
6+
> Update
7+
>
8+
> **February 3, 2025**: torchchat has support for [**DeepSeek R1 Distill: 8B**]( https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B)!
9+
>
10+
> **September 25, 2024**: torchchat has multimodal support for **Llama3.2 11B**!
711
>
812
> To try it out, finish the [Installation](#Installation) section below, then hop
913
> over to our [multimodal guide](docs/multimodal.md) to learn more.
@@ -75,6 +79,7 @@ aliases.
7579
| [ibm-granite/granite-3.0-8b-instruct](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct) || Alias to `granite3-8b`.|
7680
| [ibm-granite/granite-3.1-2b-instruct](https://huggingface.co/ibm-granite/granite-3.1-2b-instruct) || Alias to `granite3.1-2b` and `granite3.1`.|
7781
| [ibm-granite/granite-3.1-8b-instruct](https://huggingface.co/ibm-granite/granite-3.1-8b-instruct) || Alias to `granite3.1-8b`.|
82+
| [deepseek-ai/DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) || Alias to `deepseek-r1:8b`.|
7883

7984

8085
## Installation
@@ -413,7 +418,7 @@ torchchat/utils/scripts/build_native.sh et
413418

414419
Execute using the runner
415420
```bash
416-
cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -l 3 -i "Once upon a time"
421+
cmake-out/et_run llama3.1.pte -z `python3 torchchat.py where llama3.1`/tokenizer.model -i "Once upon a time"
417422
```
418423

419424
</details>

docs/quantization.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ OMP_NUM_THREADS=6 python3 torchchat.py generate llama3.1 --dso-path llama3_1.so
182182
If you built the AOTI runner with link_torchao_ops as discussed in the setup section, you can also use the C++ runner:
183183

184184
```
185-
OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
185+
OMP_NUM_THREADS=6 ./cmake-out/aoti_run llama3_1.so -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -i "Once upon a time," # -l 3
186186
```
187187

188188
#### ExecuTorch
@@ -193,7 +193,7 @@ python torchchat.py export llama3.1 --device cpu --dtype float32 --quantize '{"e
193193
Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. It will not work with the `python torchchat.py generate` command.
194194

195195
```
196-
./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l 3 -i "Once upon a time,"
196+
./cmake-out/et_run llama3_1.pte -z $HOME/.torchchat/model-cache/meta-llama/Meta-Llama-3.1-8B-Instruct/tokenizer.model -l3 -i "Once upon a time,"
197197
```
198198

199199
## Experimental TorchAO MPS lowbit kernels

0 commit comments

Comments
 (0)