|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -euo pipefail |
| 4 | + |
| 5 | +cd "$(dirname "$0")/.." || exit |
| 6 | + |
| 7 | +if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then |
| 8 | + echo >&2 "error: PROMPT_CACHE_FILE and CHAT_SAVE_DIR must be provided" |
| 9 | + exit 1 |
| 10 | +fi |
| 11 | + |
| 12 | +MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}" |
| 13 | +PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}" |
| 14 | +USER_NAME="${USER_NAME:-User}" |
| 15 | +AI_NAME="${AI_NAME:-ChatLLaMa}" |
| 16 | +DATE_TIME="$(date +%H:%M)" |
| 17 | +DATE_YEAR="$(date +%Y)" |
| 18 | + |
| 19 | +LOG="${CHAT_SAVE_DIR}/main.log" |
| 20 | +LOG_BG="${CHAT_SAVE_DIR}/main-bg.log" |
| 21 | +CUR_PROMPT_FILE="${CHAT_SAVE_DIR}/current-prompt.txt" |
| 22 | +CUR_PROMPT_CACHE="${CHAT_SAVE_DIR}/current-cache.bin" |
| 23 | +NEXT_PROMPT_FILE="${CHAT_SAVE_DIR}/next-prompt.txt" |
| 24 | +NEXT_PROMPT_CACHE="${CHAT_SAVE_DIR}/next-cache.bin" |
| 25 | + |
| 26 | +SESSION_SIZE_MSG_PATTERN='main: session file matches \d+ / \d+' |
| 27 | +SAMPLE_TIME_MSG_PATTERN='sample time =\s+\d+.\d+ ms /\s+\d+' |
| 28 | +SED_DELETE_MESSAGES="/^(${USER_NAME}:|${AI_NAME}:|\\.\\.\\.)/,\$d" |
| 29 | + |
| 30 | +CTX_SIZE=2048 |
| 31 | +CTX_ROTATE_POINT=$((CTX_SIZE * 3 / 5)) # REVIEW |
| 32 | +OPTS=(--model "$MODEL" --ctx_size "$CTX_SIZE" --repeat_last_n 256 "$@") |
| 33 | + |
| 34 | +# An unbuffered `tail -c+N` |
| 35 | +skip_bytes() { |
| 36 | + LANG=C IFS= read -r -n "$1" -d '' c |
| 37 | + while LANG=C IFS= read -r -n 1 -d '' c; do |
| 38 | + printf '%s' "$c" |
| 39 | + done |
| 40 | +} |
| 41 | + |
| 42 | +mkdir -p "$CHAT_SAVE_DIR" |
| 43 | +echo >"$LOG" |
| 44 | +trap "tail -n100 ${LOG}" EXIT |
| 45 | + |
| 46 | +if [[ ! -e "$CUR_PROMPT_FILE" ]]; then |
| 47 | + sed -e "s/\[\[USER_NAME\]\]/${USER_NAME}/g" \ |
| 48 | + -e "s/\[\[AI_NAME\]\]/${AI_NAME}/g" \ |
| 49 | + -e "s/\[\[DATE_TIME\]\]/${DATE_TIME}/g" \ |
| 50 | + -e "s/\[\[DATE_YEAR\]\]/${DATE_YEAR}/g" \ |
| 51 | + "$PROMPT_TEMPLATE" >"$CUR_PROMPT_FILE" |
| 52 | +fi |
| 53 | + |
| 54 | +if [[ ! -e "$NEXT_PROMPT_FILE" ]]; then |
| 55 | + sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE" |
| 56 | +fi |
| 57 | + |
| 58 | +if [[ "$(tail -c4 "$NEXT_PROMPT_FILE")" != "..." ]]; then |
| 59 | + echo '...' >>"$NEXT_PROMPT_FILE" |
| 60 | +fi |
| 61 | + |
| 62 | +if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then |
| 63 | + echo 'Prompt cache does not exist, building...' |
| 64 | + # Default batch_size to 8 here for better user feedback during initial prompt processing |
| 65 | + ./main 2>>"$LOG" \ |
| 66 | + --batch_size 8 \ |
| 67 | + "${OPTS[@]}" \ |
| 68 | + --prompt-cache "$PROMPT_CACHE_FILE" \ |
| 69 | + --file "$CUR_PROMPT_FILE" \ |
| 70 | + --n_predict 1 |
| 71 | + echo |
| 72 | + echo 'Done!' |
| 73 | +fi |
| 74 | + |
| 75 | +if [[ ! -e "$CUR_PROMPT_CACHE" ]]; then |
| 76 | + cp "$PROMPT_CACHE_FILE" "$CUR_PROMPT_CACHE" |
| 77 | +fi |
| 78 | +if [[ ! -e "$NEXT_PROMPT_CACHE" ]]; then |
| 79 | + cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE" |
| 80 | +fi |
| 81 | + |
| 82 | +printf '%s ' "$(< "$CUR_PROMPT_FILE")" |
| 83 | +n_tokens=0 |
| 84 | + |
| 85 | +while read -e line; do |
| 86 | + # Limit generation to remaining context, with a buffer and estimating 2 chars/token for input |
| 87 | + n_predict=$((CTX_SIZE - n_tokens - ${#line} / 2 - 32)) |
| 88 | + |
| 89 | + # Swap prompts when we're about to run out of context |
| 90 | + if ((n_predict <= 0)); then |
| 91 | + wait # for background main (below) to finish with next prompt |
| 92 | + mv "$NEXT_PROMPT_FILE" "$CUR_PROMPT_FILE" |
| 93 | + mv "$NEXT_PROMPT_CACHE" "$CUR_PROMPT_CACHE" |
| 94 | + |
| 95 | + sed -r "$SED_DELETE_MESSAGES" "$CUR_PROMPT_FILE" >"$NEXT_PROMPT_FILE" |
| 96 | + echo '...' >>"$NEXT_PROMPT_FILE" |
| 97 | + cp "$PROMPT_CACHE_FILE" "$NEXT_PROMPT_CACHE" |
| 98 | + |
| 99 | + n_tokens=0 |
| 100 | + n_predict=$((CTX_SIZE / 2)) |
| 101 | + fi |
| 102 | + |
| 103 | + echo " ${line}" >>"$CUR_PROMPT_FILE" |
| 104 | + if ((n_tokens > CTX_ROTATE_POINT)); then |
| 105 | + echo " ${line}" >>"$NEXT_PROMPT_FILE" |
| 106 | + fi |
| 107 | + |
| 108 | + n_prompt_len_pre=$(($(wc -c <"$CUR_PROMPT_FILE"))) |
| 109 | + |
| 110 | + printf '%s: ' "$AI_NAME" >>"$CUR_PROMPT_FILE" |
| 111 | + |
| 112 | + ./main 2>>"$LOG" "${OPTS[@]}" \ |
| 113 | + --prompt-cache "$CUR_PROMPT_CACHE" \ |
| 114 | + --prompt-cache-all \ |
| 115 | + --file "$CUR_PROMPT_FILE" \ |
| 116 | + --reverse-prompt "${USER_NAME}:" \ |
| 117 | + --n_predict "$n_predict" | |
| 118 | + skip_bytes 1 | # skip BOS token added by ./main |
| 119 | + tee "$CUR_PROMPT_FILE.tmp" | # save prompt + generation to tmp file |
| 120 | + skip_bytes "$n_prompt_len_pre" # print generation |
| 121 | + |
| 122 | + mv "$CUR_PROMPT_FILE.tmp" "$CUR_PROMPT_FILE" |
| 123 | + |
| 124 | + # if we hit n_predict instead of reverse-prompt, we need to add the prompt |
| 125 | + if [[ "$(tail -n1 "$CUR_PROMPT_FILE")" != "${USER_NAME}:" ]]; then |
| 126 | + printf '\n%s:' "$USER_NAME" |
| 127 | + printf '\n%s:' "$USER_NAME" >> "$CUR_PROMPT_FILE" |
| 128 | + fi |
| 129 | + |
| 130 | + printf ' ' |
| 131 | + |
| 132 | + # HACK get num tokens from debug message |
| 133 | + # TODO get both messages in one go |
| 134 | + if ! session_size_msg="$(tail -n30 "$LOG" | grep -oE "$SESSION_SIZE_MSG_PATTERN")" || |
| 135 | + ! sample_time_msg="$( tail -n10 "$LOG" | grep -oE "$SAMPLE_TIME_MSG_PATTERN")"; then |
| 136 | + echo >&2 "Couldn't get number of tokens from ./main output!" |
| 137 | + exit 1 |
| 138 | + fi |
| 139 | + |
| 140 | + n_tokens=$(($(cut -d/ -f2 <<<"$session_size_msg") + $(cut -d/ -f2 <<<"$sample_time_msg"))) |
| 141 | + |
| 142 | + if ((n_tokens > CTX_ROTATE_POINT)); then |
| 143 | + tail -c+$((n_prompt_len_pre + 1)) "$CUR_PROMPT_FILE" >>"$NEXT_PROMPT_FILE" |
| 144 | + fi |
| 145 | + |
| 146 | + # Update cache for next prompt in background, ideally during user input |
| 147 | + ./main >>"$LOG_BG" 2>&1 "${OPTS[@]}" \ |
| 148 | + --prompt-cache "$NEXT_PROMPT_CACHE" \ |
| 149 | + --file "$NEXT_PROMPT_FILE" \ |
| 150 | + --n_predict 1 & |
| 151 | +done |
0 commit comments