|
1 | 1 | #!/bin/bash
|
2 |
| -# |
3 |
| -# Temporary script - will be removed in the future |
4 |
| -# |
5 | 2 |
|
6 |
| -./main -m ../models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat.txt |
| 3 | +set -e |
7 | 4 |
|
| 5 | +MODEL="${MODEL:-./models/13B/ggml-model-q8_0.gguf}" |
| 6 | +PROMPT_TEMPLATE=${PROMPT_TEMPLATE:-./prompts/chat.txt} |
| 7 | +USER_NAME="${USER_NAME:-Matt}" |
| 8 | +AI_NAME="${AI_NAME:-LLaMa}" |
| 9 | + |
| 10 | +# Adjust to the number of CPU cores you want to use. |
| 11 | +N_THREAD="${N_THREAD:-8}" |
| 12 | + |
| 13 | +# Number of tokens to predict (made it larger than default because we want a long interaction) |
| 14 | +N_PREDICTS="${N_PREDICTS:-2048}" |
| 15 | + |
| 16 | +# Note: you can also override the generation options by specifying them on the command line: |
| 17 | +# For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 |
| 18 | +GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}" |
| 19 | + |
| 20 | +DATE_TIME=$(date +%H:%M) |
| 21 | +DATE_YEAR=$(date +%Y) |
| 22 | + |
| 23 | +PROMPT_FILE=$(mktemp -t llamacpp_prompt.XXXXXXX.txt) |
| 24 | + |
| 25 | +sed -e "s/\[\[USER_NAME\]\]/$USER_NAME/g" \ |
| 26 | + -e "s/\[\[AI_NAME\]\]/$AI_NAME/g" \ |
| 27 | + -e "s/\[\[DATE_TIME\]\]/$DATE_TIME/g" \ |
| 28 | + -e "s/\[\[DATE_YEAR\]\]/$DATE_YEAR/g" \ |
| 29 | + $PROMPT_TEMPLATE > $PROMPT_FILE |
| 30 | + |
| 31 | +# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS |
| 32 | +./main $GEN_OPTIONS \ |
| 33 | + --model "$MODEL" \ |
| 34 | + --threads "$N_THREAD" \ |
| 35 | + --n_predict "$N_PREDICTS" \ |
| 36 | + --color --interactive \ |
| 37 | + --file ${PROMPT_FILE} \ |
| 38 | + --reverse-prompt "${USER_NAME}:" \ |
| 39 | + --in-prefix ' ' \ |
| 40 | + "$@" |
0 commit comments