diff --git a/Dockerfile b/Dockerfile index 255cfa811..2bad4bd5c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 +FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 RUN apt-get update RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y \ @@ -6,6 +6,7 @@ RUN DEBIAN_FRONTEND="noninteractive" apt-get install -y \ git \ htop \ tmux \ + file \ vim \ expect \ mpich \ diff --git a/README.md b/README.md index e762f6b0b..c526700fd 100644 --- a/README.md +++ b/README.md @@ -113,23 +113,22 @@ Under the hood, it uses Refact models and the best open-source models. At the moment, you can choose between the following models: -| Model | Completion | Chat | AI Toolbox | Fine-tuning | -|--------------------------------------------------------------------------------------|------------|------|------------|-------------| -| [Refact/1.6B](https://huggingface.co/smallcloudai/Refact-1_6B-fim) | + | + | | + | -| [starcoder/1b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | + | -| [starcoder/3b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | + | -| [starcoder/7b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | + | -| [starcoder/15b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | | -| [starcoder/15b/plus](https://huggingface.co/TheBloke/starcoderplus-GPTQ) | + | | | | -| [wizardcoder/15b](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GPTQ) | + | | | | -| [codellama/7b](https://huggingface.co/TheBloke/CodeLlama-7B-fp16) | + | | | | -| [starchat/15b/beta](https://huggingface.co/TheBloke/starchat-beta-GPTQ) | | + | | | -| [wizardlm/7b](https://huggingface.co/TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ) | | + | | | -| [wizardlm/13b](https://huggingface.co/TheBloke/WizardLM-13B-V1.1-GPTQ) | | + | | | -| [wizardlm/30b](https://huggingface.co/TheBloke/WizardLM-30B-GPTQ) | | + | | | -| [llama2/7b](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GPTQ) | | + | | | -| [llama2/13b](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ) | | + | | | - +| Model | Completion | Chat | AI Toolbox | Fine-tuning | +|---------------------------------------------------------------------------------|------------|------|------------|-------------| +| [Refact/1.6B](https://huggingface.co/smallcloudai/Refact-1_6B-fim) | + | + | | + | +| [starcoder/1b/base](https://huggingface.co/smallcloudai/starcoderbase-1b) | + | | | + | +| [starcoder/3b/base](https://huggingface.co/smallcloudai/starcoderbase-3b) | + | | | + | +| [starcoder/7b/base](https://huggingface.co/smallcloudai/starcoderbase-7b) | + | | | + | +| [starcoder/15b/base](https://huggingface.co/TheBloke/starcoder-GPTQ) | + | | | | +| [starcoder/15b/plus](https://huggingface.co/TheBloke/starcoderplus-GPTQ) | + | | | | +| [wizardcoder/15b](https://huggingface.co/TheBloke/WizardCoder-15B-1.0-GPTQ) | + | | | | +| [codellama/7b](https://huggingface.co/TheBloke/CodeLlama-7B-fp16) | + | | | | +| [starchat/15b/beta](https://huggingface.co/TheBloke/starchat-beta-GPTQ) | | + | | | +| [wizardlm/7b](https://huggingface.co/TheBloke/WizardLM-7B-V1.0-Uncensored-GPTQ) | | + | | | +| [wizardlm/13b](https://huggingface.co/TheBloke/WizardLM-13B-V1.1-GPTQ) | | + | | | +| [wizardlm/30b](https://huggingface.co/TheBloke/WizardLM-30B-fp16) | | + | | | +| [llama2/7b](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GPTQ) | | + | | | +| [llama2/13b](https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ) | | + | | | ## Usage @@ -141,10 +140,6 @@ Q: Can I run a model on CPU? A: it doesn't run on CPU yet, but it's certainly possible to implement this. -Q: Sharding is disabled, why? - -A: It's not ready yet, but it's coming soon. - ## Community & Support - Contributing [CONTRIBUTING.md](CONTRIBUTING.md) diff --git a/known_models_db/refact_known_models/huggingface.py b/known_models_db/refact_known_models/huggingface.py index f86791a0d..eefc0fc5e 100644 --- a/known_models_db/refact_known_models/huggingface.py +++ b/known_models_db/refact_known_models/huggingface.py @@ -31,35 +31,32 @@ }, "starcoder/1b/base": { "backend": "transformers", - "model_path": "bigcode/starcoderbase-1b", + "model_path": "smallcloudai/starcoderbase-1b", "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 6000, "T": 4096, - "hidden": True, "filter_caps": ["completion", "finetune"], }, "starcoder/3b/base": { "backend": "transformers", - "model_path": "bigcode/starcoderbase-3b", + "model_path": "smallcloudai/starcoderbase-3b", "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 9000, "T": 4096, - "hidden": True, "filter_caps": ["completion", "finetune"], }, "starcoder/7b/base": { "backend": "transformers", - "model_path": "bigcode/starcoderbase-7b", + "model_path": "smallcloudai/starcoderbase-7b", "diff_scratchpad_class": "refact_scratchpads:ScratchpadPSM", "chat_scratchpad_class": None, "model_class_kwargs": {}, "required_memory_mb": 18000, "T": 2048, - "hidden": True, "filter_caps": ["completion", "finetune"], }, "wizardcoder/15b": { diff --git a/metrics/measure_humaneval_continue.py b/metrics/measure_humaneval_continue.py new file mode 100644 index 000000000..a8b5fb5c2 --- /dev/null +++ b/metrics/measure_humaneval_continue.py @@ -0,0 +1,89 @@ +import sys, termcolor, subprocess, json, time, random +from copy import deepcopy +from mpi4py import MPI +from human_eval.data import write_jsonl, read_problems +from human_eval.data import read_problems +import requests + + +#MODEL = "smallcloudai/Refact-1_6B-fim" +MODEL = "Refact/1.6B" + +TEMPERATURE = 0.2 +TOP_P = 0.95 +TIMES = 1 +MAX_TOKENS = 256 + + +def run_completion_call(src_txt): + res = requests.post(f"http://127.0.0.1:8008/v1/completions", json={ + "model": MODEL, + "max_tokens": MAX_TOKENS, + "stream": False, + "echo": True, + "top_p": TOP_P, + "temperature": TEMPERATURE, + "prompt": src_txt, + "stop": ["\n\n\n"], + }) + res.raise_for_status() + j = res.json() + # print(j) + return j["choices"][0]["text"] + + +def test_by_continuing(comm, case): + orig = case["prompt"].rstrip() + print_me = termcolor.colored(orig[:-1], "yellow") + if comm.size == 1: + print(print_me) + t = run_completion_call(orig) + uncut = t + lines = t.split("\n") + filtered = [] + for x in lines: + if x.startswith(" ") or x.strip() == "": + filtered.append(x) + elif not x.startswith(" "): + break + t = "\n".join(filtered) + assert uncut.startswith(t) + print_response = termcolor.colored(t, "green") + " " + termcolor.colored(uncut[len(t):], attrs=["dark"]) + if comm.size == 1: + print(print_response) + else: + print(print_me + "\n" + print_response) + case["completion"] = t + + +if __name__ == "__main__": + postfix = "" + if len(sys.argv) > 1: + postfix = sys.argv[1] + t0 = time.time() + from human_eval.data import write_jsonl, read_problems + from human_eval.data import read_problems + problems = list(read_problems().values()) * TIMES + comm = MPI.COMM_WORLD + my_problems = problems[comm.rank::comm.size] + output = [] + for i, case_ in enumerate(my_problems): + case = deepcopy(case_) + print("-" * 40, " rank=%i case=%i" % (comm.rank, i), "-" * 40) + test_by_continuing(comm, case) + output.append(case) + comm.barrier() + t1 = time.time() + tmp = comm.gather(output, root=0) + if comm.rank == 0: + all_output = [x for y in tmp for x in y] + output_name = "human-%s%s.jsonl" % ("continue", postfix) + write_jsonl(output_name, all_output) + res = subprocess.check_output(f"evaluate_functional_correctness {output_name}", shell=True) + metrics = json.loads(res.decode('utf-8').strip().split('\n')[-1].replace("'", '"')) + print(termcolor.colored(metrics, "magenta")) + tmp = "method=%s temperature=%0.2f top_p=%0.2f postfix='%s' world=%i times=%i %s %0.2fs %s\n" % ( + "continue", TEMPERATURE, TOP_P, postfix, comm.size, TIMES, metrics, (t1 - t0), MODEL) + with open("human-eval-all-results.txt", "a") as f: + f.write(tmp) + print(tmp) diff --git a/refact_data_pipeline/finetune/finetune_utils.py b/refact_data_pipeline/finetune/finetune_utils.py index 70ddfa6c2..e65293a4d 100644 --- a/refact_data_pipeline/finetune/finetune_utils.py +++ b/refact_data_pipeline/finetune/finetune_utils.py @@ -73,13 +73,24 @@ def get_active_loras(models_db: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: active_loras = { legacy_finetune_model: active_loras, } + + def get_active_lora(model_name: str, model_info: Dict[str, Any]) -> Dict: + finetune_model = model_info.get("finetune_model", model_name) + if finetune_model not in active_loras: + return {} + else: + return { + **active_loras[finetune_model], + "model": model_name + } + return { model_name: { "lora_mode": "latest-best", - **active_loras.get(model_name, {}), + **get_active_lora(model_name, model_info), } for model_name, model_info in models_db.items() - if "finetune" in model_info["filter_caps"] + if "finetune_model" in model_info or "finetune" in model_info["filter_caps"] } diff --git a/refact_scratchpads/scratchpad_completion.py b/refact_scratchpads/scratchpad_completion.py index 37d560be1..2257a3afd 100644 --- a/refact_scratchpads/scratchpad_completion.py +++ b/refact_scratchpads/scratchpad_completion.py @@ -28,19 +28,19 @@ def after_token_selection( self.needs_upload = True self._tokens.append(chosen_token.item()) if chosen_token == self.enc.EOT: - self.finish_reason = "eot" + self.finish_reason = "stop-eot" if chosen_token in self.stop_tokens: - self.finish_reason = "stoptoken" + self.finish_reason = "stop-token" if len(self._tokens) > 3: if self.stop_lf_lf and self._tokens[-1] == self.enc.LF and self._tokens[-2] == self.enc.LF: - self.finish_reason = "ins-stop-lflf" + self.finish_reason = "stop-lflf" if self.stop_lf_lf_lf: if self._tokens[-3] == self.enc.LF and self._tokens[-2] == self.enc.LF and self._tokens[-1] == self.enc.LF: - self.finish_reason = "ins-stop-lflflf" + self.finish_reason = "stop-lflflf" elif self._tokens[-2] == self.enc.LFLF and self._tokens[-1] == self.enc.LF: - self.finish_reason = "ins-stop-lflflf" + self.finish_reason = "stop-lflflf" elif self._tokens[-2] == self.enc.LFLF and self._tokens[-1] == self.enc.LFLF: - self.finish_reason = "ins-stop-lflflf" + self.finish_reason = "stop-lflflf" return dict() def prompt(self, T: int): diff --git a/refact_scratchpads/scratchpad_hf.py b/refact_scratchpads/scratchpad_hf.py index 6e13d3657..add929dad 100644 --- a/refact_scratchpads/scratchpad_hf.py +++ b/refact_scratchpads/scratchpad_hf.py @@ -72,14 +72,14 @@ def after_token_selection(self, m, chosen_token: th.Tensor, **unused) -> Dict[st t = chosen_token.item() if t in [self._tokenizer.eos_token_id]: - self.finish_reason = "eot" + self.finish_reason = "stop-eot" elif t in self._special_tokens: - self.finish_reason = "special-token" + self.finish_reason = "stop-special-token" if not self.finish_reason: self._completion.append(t) if t in self._stop_tokens: - self.finish_reason = "stoptoken" + self.finish_reason = "stop-token" couple_of_tokens_decoded = self._tokenizer.decode(([self._prev_token] if self._prev_token is not None else []) + [t]) self._prev_token = t diff --git a/self_hosting_machinery/inference/inference_hf.py b/self_hosting_machinery/inference/inference_hf.py index 8e42dbfef..5a522f247 100644 --- a/self_hosting_machinery/inference/inference_hf.py +++ b/self_hosting_machinery/inference/inference_hf.py @@ -268,7 +268,7 @@ def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy self._model.generate(**generation_kwargs) if not scratchpad.finish_reason: - scratchpad.finish_reason = "maxlen" + scratchpad.finish_reason = "length" upload_proxy_args["ts_batch_finished"] = time.time() upload_proxy.upload_result( **upload_proxy_args, diff --git a/self_hosting_machinery/inference/inference_legacy.py b/self_hosting_machinery/inference/inference_legacy.py index bd90c475d..3837edaa5 100644 --- a/self_hosting_machinery/inference/inference_legacy.py +++ b/self_hosting_machinery/inference/inference_legacy.py @@ -233,7 +233,7 @@ def _generate_using_scratchpad(self, break if not scratchpad.finish_reason: - scratchpad.finish_reason = "maxlen" + scratchpad.finish_reason = "length" def infer(self, request: Dict[str, Any], upload_proxy: UploadProxy, upload_proxy_args: Dict): request_id = request["id"] diff --git a/self_hosting_machinery/scripts/best_lora.py b/self_hosting_machinery/scripts/best_lora.py index 88e903381..1032105cc 100644 --- a/self_hosting_machinery/scripts/best_lora.py +++ b/self_hosting_machinery/scripts/best_lora.py @@ -1,12 +1,46 @@ import re import os import json -from typing import Dict from self_hosting_machinery import env from refact_data_pipeline.finetune.finetune_utils import get_run_model_name from refact_data_pipeline.finetune.finetune_utils import default_finetune_model +from typing import Dict, Optional + + +def find_best_checkpoint(run_id: str) -> Dict[str, str]: + run_dir = os.path.join(env.DIR_LORAS, run_id) + if not os.path.isdir(run_dir): + raise RuntimeError(f"run_id not found") + checkpoints_dir = os.path.join(run_dir, "checkpoints") + if not os.path.isdir(checkpoints_dir): + raise RuntimeError(f"run_id has no checkpoints") + + def checkpoint_name_to_loss(checkpoint_id: str) -> Optional[float]: + match = re.match(r"iter(\d+)-testloss(\d+\.\d+)", checkpoint_id) + if match is None: + return None + return float(match.group(2)) + + checkpoints = list(filter(lambda x: x[0] is not None and os.path.isdir(x[1]), [ + ( + checkpoint_name_to_loss(checkpoint_id), + os.path.join(checkpoints_dir, checkpoint_id), + checkpoint_id, + ) + for checkpoint_id in os.listdir(checkpoints_dir) + ])) + + if not checkpoints: + raise RuntimeError(f"run_id has no valid checkpoints") + + best_checkpoint = min(checkpoints, key=lambda x: x[0]) + return { + "best_checkpoint_id": best_checkpoint[2], + "path": best_checkpoint[1], + } + def find_best_lora(model_name: str) -> Dict[str, str]: error = "no completed runs found" @@ -74,4 +108,10 @@ def find_best_lora(model_name: str) -> Dict[str, str]: parser.add_argument("--model", type=str, default=default_finetune_model) args = parser.parse_args() - print(find_best_lora(args.model)) + best_lora = find_best_lora(args.model) + try: + best_checkpoint = find_best_checkpoint(best_lora["latest_run_id"]) + except RuntimeError as e: + best_checkpoint = None + print("Best LoRA", best_lora) + print("Best checkpoint", best_checkpoint) diff --git a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py index cb8659803..debaade97 100644 --- a/self_hosting_machinery/webgui/selfhost_fastapi_completions.py +++ b/self_hosting_machinery/webgui/selfhost_fastapi_completions.py @@ -232,16 +232,40 @@ def __init__(self, timeout: int = 30, *args, **kwargs): super().__init__(*args, **kwargs) + + # API for direct FIM and Chat usage self.add_api_route("/login", self._login, methods=["GET"]) self.add_api_route("/secret-key-activate", self._secret_key_activate, methods=["GET"]) - self.add_api_route("/completions", self._completions, methods=["POST"]) self.add_api_route("/contrast", self._contrast, methods=["POST"]) self.add_api_route("/chat", self._chat, methods=["POST"]) + + # API for LSP server + self.add_api_route("/coding_assistant_caps.json", self._coding_assistant_caps, methods=["GET"]) + self.add_api_route("/completions", self._completions, methods=["POST"]) + self._inference_queue = inference_queue self._id2ticket = id2ticket self._model_assigner = model_assigner self._timeout = timeout + async def _coding_assistant_caps(self): + code_completion_default_model, _ = completion_resolve_model(self._inference_queue) + return { + "cloud_name": "Refact Self-Hosted", + "endpoint_template": f"{self.prefix}/completions", + "endpoint_style": "openai", + "telemetry_basic_dest": "", + "running_models": self._inference_queue.models_available(), + "code_completion_default_model": code_completion_default_model, + "code_chat_default_model": "", + "tokenizer_path_template": "https://huggingface.co/$MODEL/resolve/main/tokenizer.json", + "tokenizer_rewrite_path": { + model: self._model_assigner.models_db[model]["model_path"] + for model in self._inference_queue.models_available() + if model in self._model_assigner.models_db + }, + } + async def _login(self): longthink_functions = dict() longthink_filters = set() @@ -297,7 +321,7 @@ async def _secret_key_activate(self): async def _completions(self, post: NlpCompletion, account: str = "XXX"): ticket = Ticket("comp-") req = post.clamp() - model_name, err_msg = completion_resolve_model(self._inference_queue) + model_name, err_msg = static_resolve_model(post.model, self._inference_queue) if err_msg: log("%s model resolve \"%s\" -> error \"%s\" from %s" % (ticket.id(), post.model, err_msg, account)) raise HTTPException(status_code=400, detail=err_msg) @@ -315,7 +339,10 @@ async def _completions(self, post: NlpCompletion, account: str = "XXX"): self._id2ticket[ticket.id()] = ticket await q.put(ticket) seen = [""] * post.n - return StreamingResponse(completion_streamer(ticket, post, self._timeout, seen, req["created"])) + return StreamingResponse( + completion_streamer(ticket, post, self._timeout, seen, req["created"]), + media_type=("text/event-stream" if post.stream else "application/json"), + ) async def _contrast(self, post: DiffCompletion, request: Request, account: str = "XXX"): if post.function != "diff-anywhere": diff --git a/self_hosting_machinery/webgui/selfhost_model_assigner.py b/self_hosting_machinery/webgui/selfhost_model_assigner.py index 9077a5783..0ffd760ec 100644 --- a/self_hosting_machinery/webgui/selfhost_model_assigner.py +++ b/self_hosting_machinery/webgui/selfhost_model_assigner.py @@ -8,6 +8,8 @@ from self_hosting_machinery.webgui.selfhost_webutils import log from known_models_db.refact_known_models import models_mini_db from known_models_db.refact_toolbox_db import modelcap_records +from self_hosting_machinery.scripts.best_lora import find_best_lora +from refact_data_pipeline.finetune.finetune_utils import get_active_loras from typing import List, Dict, Set, Any @@ -205,12 +207,28 @@ def _capabilities(func_type: str) -> Set: chat_caps = _capabilities("chat") toolbox_caps = _capabilities("toolbox") + active_loras = get_active_loras(self.models_db) for k, rec in self.models_db.items(): if rec.get("hidden", False): continue + finetune_info = None + if k in active_loras: + lora_mode = active_loras[k]["lora_mode"] + latest_best_lora_info = find_best_lora(rec.get("finetune_model", k)) + if lora_mode == "latest-best" and latest_best_lora_info["latest_run_id"]: + finetune_info = { + "run": latest_best_lora_info["latest_run_id"], + "checkpoint": latest_best_lora_info["best_checkpoint_id"], + } + elif lora_mode == "specific" and active_loras[k].get("specific_lora_run_id", ""): + finetune_info = { + "run": active_loras[k]["specific_lora_run_id"], + "checkpoint": active_loras[k]["specific_checkpoint"], + } info.append({ "name": k, "backend": rec["backend"], + "finetune_info": finetune_info, "has_completion": bool("completion" in rec["filter_caps"]), "has_finetune": bool("finetune" in rec["filter_caps"]), "has_toolbox": bool(toolbox_caps.intersection(rec["filter_caps"])), diff --git a/self_hosting_machinery/webgui/static/components/modals/modal-upload-files.js b/self_hosting_machinery/webgui/static/components/modals/modal-upload-files.js new file mode 100644 index 000000000..a7a33c03c --- /dev/null +++ b/self_hosting_machinery/webgui/static/components/modals/modal-upload-files.js @@ -0,0 +1,328 @@ + +let modal_html = ` + +` + +let gl_insert_in_el; +let gl_open_on_click_el; +let gl_open_on_click_el_default_html; +let gl_modal; + +function make_modal_backdrop_static() { + gl_modal._config.backdrop = 'static'; +} + +// Function to make the modal backdrop responsive to outside clicks +function make_modal_backdrop_responsive() { + gl_modal._config.backdrop = 'true'; +} + + +export async function init( + insert_in_el, + open_on_click_el, + modal_label, + default_tab, + submit_link_endpoint, + submit_input_endpoint, + text_on_progress_done, + link_placeholder, + input_help_text +) { + gl_insert_in_el = insert_in_el; + gl_open_on_click_el = open_on_click_el; + gl_open_on_click_el_default_html = open_on_click_el.innerHTML; + insert_in_el.innerHTML = modal_html; + + gl_modal = new bootstrap.Modal(document.getElementById('updlg-modal')); + insert_in_el.querySelector('#updlg-modal-label').innerText = modal_label; + + if (default_tab === 'link') { + insert_in_el.querySelector('#updlg-nav-tab-link').classList.add('active', 'main-active') + insert_in_el.querySelector('#updlg-link-div').classList.add('show', 'active') + } else if (default_tab === 'input') { + insert_in_el.querySelector('#updlg-nav-tab-input').classList.add('active','main-active') + insert_in_el.querySelector('#updlg-input-div').classList.add('show', 'active') + } else { + console.log(`default tab ${default_tab} is not implemented!`); + } + if (link_placeholder) { + insert_in_el.querySelector('#updlg-link').placeholder = link_placeholder; + } + if (input_help_text) { + insert_in_el.querySelector('.ssh-info').innerText = input_help_text; + } + + open_on_click_el.addEventListener('click', () => { + show_modal(); + }); + + add_nav_btn_click_handlers(); + add_submit_handler(submit_link_endpoint, submit_input_endpoint, text_on_progress_done); +} + + +export function switch_away(el) { + el.innerHTML = ''; +} + +function add_nav_btn_click_handlers() { + const file_modal = document.getElementById('updlg-modal'); + + const btns_nav_upload_files = file_modal.querySelectorAll('button.updlg-nav') + const panes_upload_files = file_modal.querySelectorAll('.updlg-pane-modal'); + btns_nav_upload_files.forEach( + el => el.addEventListener('click', () => { + if (!el.classList.contains('active')) { + btns_nav_upload_files.forEach(el => el.classList.remove('active', 'main-active')); + el.classList.add('active', 'main-active'); + panes_upload_files.forEach(el => el.classList.remove('show', 'active')); + file_modal.querySelector(`#${el.dataset.toggle}`).classList.add('show', 'active') + } + }) + ); +} + + +function add_submit_handler(submit_link_endpoint, submit_input_endpoint, text_on_progress_done) { + const file_modal = document.getElementById('updlg-modal'); + function get_upload_method() { + const btns = file_modal.querySelectorAll('button.updlg-nav') + for (const btn of btns) { + if (btn.classList.contains('active')) { + return btn.dataset.method; + } + } + } + + const upload_files_submit = file_modal.querySelector('#updlg-modal-submit') + + upload_files_submit.addEventListener('click', () => { + const upload_method = get_upload_method(); + if (upload_method === 'link') { + upload_url(submit_link_endpoint); + } + else if (upload_method === 'input') { + upload_file(submit_input_endpoint, text_on_progress_done); + } else { + console.log(`upload method ${upload_method} is not implemented!`); + } + }); +} + + +function reset_modal_fields() { + const file_modal = document.getElementById('updlg-modal'); + + gl_open_on_click_el.innerHTML = gl_open_on_click_el_default_html; + file_modal.querySelector('#updlg-input').value = ''; + file_modal.querySelector('#updlg-modal-submit').disabled = false; + file_modal.querySelector('#updlg-nav-tab-link').disabled = false; + file_modal.querySelector('#updlg-link').disabled = false; + file_modal.querySelector('#updlg-input').disabled = false; + file_modal.querySelector('#updlg-file-upload-progress-bar').setAttribute('aria-valuenow', '0'); + file_modal.querySelector('#updlg-file-upload-progress').classList.add('d-none'); + file_modal.querySelector('#updlg-loaded_n_total').innerHTML = ""; + file_modal.querySelector('#updlg-status').innerHTML = ""; + file_modal.querySelector('#updlg-link').value = ""; + file_modal.querySelector('#updlg-100-spinner').hidden = true; + file_modal.querySelector('#updlg-nav-tab-input').disabled = false; + make_modal_backdrop_responsive(); + file_modal.querySelector('#updlg-modal-close').disabled = false; +} + +function hide_modal() { + bootstrap.Modal.getOrCreateInstance(document.getElementById('updlg-modal')).hide(); +} + +function show_modal() { + bootstrap.Modal.getOrCreateInstance(document.getElementById('updlg-modal')).show(); +} + + +function prepare_for_upload(upload_method) { + const file_modal = document.getElementById('updlg-modal'); + const process_button = file_modal.querySelector('#updlg-modal-submit'); + + file_modal.querySelector('#updlg-modal-close').disabled = true; + process_button.disabled = true; + process_button.dataset.loading = 'true'; + make_modal_backdrop_static(); + + if (gl_open_on_click_el.innerHTML === gl_open_on_click_el_default_html) { + gl_open_on_click_el.innerHTML = ` Uploading`; + } + + if (upload_method === 'link') { + file_modal.querySelector('#updlg-nav-tab-input').disabled = true; + file_modal.querySelector('#updlg-link').disabled = true; + file_modal.querySelector('#updlg-100-spinner').hidden = false; + file_modal.querySelector('#updlg-status').innerText = 'Uploading file. Please wait...' + } else if (upload_method === 'input') { + file_modal.querySelector('#updlg-nav-tab-link').disabled = true; + file_modal.querySelector('#updlg-input').disabled = true; + } +} + + +function upload_file(endpoint, text_on_progress_done) { + const file_modal = document.getElementById('updlg-modal'); + const file_input = file_modal.querySelector('#updlg-input'); + const file_upload_progress = file_modal.querySelector('#updlg-file-upload-progress'); + const progress_bar = file_modal.querySelector('#updlg-file-upload-progress-bar'); + const upload_files_status = file_modal.querySelector('#updlg-status'); + + function progress_handler(event) { + prepare_for_upload('input'); + file_modal.querySelector('#updlg-loaded_n_total').innerText = "Uploaded " + event.loaded + " bytes of " + event.total; + let percent = (event.loaded / event.total) * 100; + progress_bar.setAttribute('aria-valuenow', Math.round(percent).toString()); + progress_bar.style.width = Math.round(percent).toString() + "%"; + upload_files_status.innerText = Math.round(percent).toString() + "% uploaded... please wait"; + if (Math.round(percent) >= 100) { + upload_files_status.innerText = text_on_progress_done; + file_modal.querySelector('#updlg-100-spinner').hidden = false; + } + } + + function complete_handler(event) { + upload_files_status.innerText = event.target.responseText; + + if(event.target.status === 200) { + setTimeout(() => { + reset_modal_fields(); + hide_modal(); + }, 500); + } else { + let error_msg = JSON.parse(event.target.responseText); + reset_modal_fields(); + upload_files_status.innerText = error_msg.detail; + } + } + + function error_handler(event) { + upload_files_status.innerText = event.target.responseText.message; + } + + function abort_handler() { + upload_files_status.innerText = "Upload Aborted"; + } + + if (file_input.files.length === 0) { + return; + } + let formdata = new FormData(); + formdata.append("file", file_input.files[0]); + file_upload_progress.classList.toggle('d-none'); + let ajax = new XMLHttpRequest(); + ajax.upload.addEventListener("progress", progress_handler, false); + ajax.addEventListener("load", complete_handler, false); + ajax.addEventListener("error", error_handler, false); + ajax.addEventListener("abort", abort_handler, false); + ajax.open("POST", endpoint); + ajax.send(formdata); +} + + +function upload_url(endpoint) { + const file_modal = document.getElementById('updlg-modal'); + + function handle_invalid_url() { + const error = new Error('Invalid URL'); + file_modal.querySelector('#updlg-status').innerText = error.message; + } + + const file_input = file_modal.querySelector('#updlg-link'); + if (!file_input || file_input.value === '') { + return; + } + + const url_regex = /^(ftp|http|https):\/\/[^ "]+$/; + const is_url = url_regex.test(file_input.value); + if (!is_url) { + handle_invalid_url(); + return; + } + + let formData = { + 'url': file_input.value + }; + + prepare_for_upload('link') + fetch(endpoint, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(formData) + }) + .then( + response => { + if (!response.ok) { + return response.json() + .then((response) => { + throw new Error(response['detail']); + }); + } + reset_modal_fields(); + return response.json(); + }) + .then( + () => { + reset_modal_fields(); + hide_modal(); + }) + .catch( + error => { + reset_modal_fields(); + file_modal.querySelector('#updlg-status').innerText = error.message; + }); +} diff --git a/self_hosting_machinery/webgui/static/style.css b/self_hosting_machinery/webgui/static/style.css index ab2a8cd45..3b6f322a8 100644 --- a/self_hosting_machinery/webgui/static/style.css +++ b/self_hosting_machinery/webgui/static/style.css @@ -695,7 +695,10 @@ h3 { opacity: 1 !important; pointer-events: auto; } -.table-checkpoints tr td:first-of-type { +.table-checkpoints tr td:nth-of-type(1) { + width: 30px; +} +.table-checkpoints tr td:nth-of-type(2) { width: 30px; } .use-model-pane { diff --git a/self_hosting_machinery/webgui/static/tab-finetune.html b/self_hosting_machinery/webgui/static/tab-finetune.html index db8b381b1..0d0a65100 100644 --- a/self_hosting_machinery/webgui/static/tab-finetune.html +++ b/self_hosting_machinery/webgui/static/tab-finetune.html @@ -1,8 +1,10 @@
+

Use Finetuned Model

+
Model:
Completed Runs - No runs yet. + No runs yet.
+
@@ -298,9 +301,6 @@ -
- Optional. If not specified, the default branch will be used. -
@@ -385,4 +385,23 @@
Limit training time
+ + \ No newline at end of file diff --git a/self_hosting_machinery/webgui/static/tab-finetune.js b/self_hosting_machinery/webgui/static/tab-finetune.js index d339c80d1..86188d01a 100644 --- a/self_hosting_machinery/webgui/static/tab-finetune.js +++ b/self_hosting_machinery/webgui/static/tab-finetune.js @@ -1,4 +1,6 @@ import { general_error } from './error.js'; +import { init as init_upload_files_modal, switch_away as upload_files_modal_switch_away } from './components/modals/modal-upload-files.js' + let logs_streamer_run_id = ""; let gfx_showing_run_id = ""; @@ -52,7 +54,6 @@ function tab_finetune_config_and_runs() { return response.json(); }) .then(function (data) { - console.log('tab-finetune-config-and-runs',data); finetune_configs_and_runs = data; render_runs(); render_model_select(); @@ -116,6 +117,53 @@ function render_finetune_settings(data = {}) { } } +function run_checked(run_id) { + if (gfx_showing_run_id != run_id) { + gfx_showing_run_id = run_id; + const timestamp = new Date().getTime(); + const gfx = document.querySelector('.fine-gfx'); + gfx.src = `/tab-finetune-progress-svg/${run_id}?t=${timestamp}`; + } + start_log_stream(run_id); + render_checkpoints(find_checkpoints_by_run(run_id)); + + const log_link = document.querySelector('.log-link'); + if (log_link && log_link.classList.contains('d-none')) { + log_link.classList.remove('d-none'); + } + if (log_link) { + log_link.href = `/tab-finetune-log/${run_id}`; + } +} + +function finetune_activate_run(run_id, checkpoint) { + const finetune_run = finetune_configs_and_runs.finetune_runs.find((run) => run.run_id === run_id); + if (!finetune_run) { + return; + } + if (!checkpoint) { + checkpoint = finetune_run["best_checkpoint"]["best_checkpoint_id"]; + } + if (finetune_run.model_name !== finetune_configs_and_runs.completion_model.finetune) { + const modal = document.getElementById('finetune-tab-model-warning-modal'); + const modal_instance = bootstrap.Modal.getOrCreateInstance(modal); + let warning_text = ` + This fine-tuning checkpoint is for ${finetune_run.model_name} base model. + Your currently active model is ${finetune_configs_and_runs.completion_model.name}, + you can change it in the Model Hosting tab. + `; + if (!finetune_configs_and_runs.completion_model.name) { + warning_text = `Choose completion model first to activate checkpoint.`; + } + document.querySelector('#finetune-tab-model-warning-modal #model-warning-message').innerHTML = ` + + `; + modal_instance.show(); + } else if (checkpoint) { + finetune_switch_activate("specific", run_id, checkpoint); + } +} + function render_runs() { const runs_table = document.querySelector('.run-table'); if(finetune_configs_and_runs.finetune_runs.length === 0) { @@ -135,6 +183,7 @@ function render_runs() { const run_minutes = document.createElement("td"); const run_steps = document.createElement("td"); const run_active = document.createElement("td"); + const run_download = document.createElement("td"); const run_delete = document.createElement("td"); run_name.innerHTML = `
${run.run_id}${run.model_name}
` @@ -171,12 +220,34 @@ function render_runs() { const item_disabled = run_is_working ? "disabled" : "" run_delete.innerHTML = ``; - run_active.innerHTML = ``; + if (find_checkpoints_by_run(run.run_id).length > 0) { + run_active.innerHTML = ` + `; + run_download.innerHTML = ` + + + `; + if (!run_is_working) { + run_active.addEventListener('click', (event) => { + event.stopPropagation(); + finetune_activate_run(run_table_row.dataset.run); + }); + run_download.addEventListener('click', (event) => { + event.stopPropagation(); + }); + } + } else { + run_active.innerHTML = ``; + } run_table_row.appendChild(run_name); run_table_row.appendChild(run_status); run_table_row.appendChild(run_minutes); run_table_row.appendChild(run_steps); run_table_row.appendChild(run_active); + run_table_row.appendChild(run_download); run_table_row.appendChild(run_delete); if (!run_is_working) { @@ -188,33 +259,12 @@ function render_runs() { let delete_lora_modal_instance = bootstrap.Modal.getOrCreateInstance(delete_lora_modal); delete_lora_modal_instance.show(); }); - run_active.addEventListener('click', (event) => { - event.stopPropagation(); - const lora_for_start = run_table_row.dataset.run; - console.log('lora_for_start', lora_for_start); - finetune_switch_activate("latest-best", lora_for_start); - }); } runs_table.appendChild(run_table_row); if (selected_lora == run.run_id) { run_table_row.classList.add('table-success'); - if (gfx_showing_run_id != run.run_id) { - gfx_showing_run_id = run.run_id; - const timestamp = new Date().getTime(); - const gfx = document.querySelector('.fine-gfx'); - gfx.src = `/tab-finetune-progress-svg/${run.run_id}?t=${timestamp}`; - } - start_log_stream(run.run_id); - render_checkpoints(find_checkpoints_by_run(run.run_id)); - - const log_link = document.querySelector('.log-link'); - if(log_link && log_link.classList.contains('d-none')) { - log_link.classList.remove('d-none'); - } - if(log_link) { - log_link.href = `/tab-finetune-log/${run.run_id}`; - } + run_checked(run.run_id); } // if(is_working) { // start_finetune_button.innerHTML = '
' + 'Stop'; @@ -231,7 +281,7 @@ function render_runs() { event.stopPropagation(); const run_id = this.dataset.run; selected_lora = run_id; - render_checkpoints(find_checkpoints_by_run(run_id)); + run_checked(run_id); }); }); } @@ -251,7 +301,7 @@ function delete_run(run_id) { } const gfx = document.querySelector('.fine-gfx'); gfx.src = `/tab-finetune-progress-svg/none`; - const log_container = document.querySelector('.tab-upload-finetune-logs'); + const log_container = document.querySelector('.tab-upload-finetune-logs'); if (log_container) { log_container.innerHTML = ''; } @@ -272,7 +322,13 @@ const find_checkpoints_by_run = (run_id) => { }; function render_lora_switch() { - let mode = finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name] ? finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name].lora_mode : "latest-best"; + const model_name = finetune_configs_and_runs.completion_model.name; + const finetune_model = finetune_configs_and_runs.completion_model.finetune; + let lora_switch_model = document.querySelector('#lora-switch-model'); + lora_switch_model.innerHTML = ` + Model: ${model_name} + `; + let mode = finetune_configs_and_runs.active[finetune_model] ? finetune_configs_and_runs.active[finetune_model].lora_mode : "latest-best"; loras_switch_no_reaction = true; // avoid infinite loop when setting .checked if (mode === 'off') { loras_switch_off.checked = true; @@ -289,8 +345,8 @@ function render_lora_switch() { lora_switch_checkpoint.style.display = 'block'; lora_switch_run_id.style.opacity = 1; lora_switch_checkpoint.style.opacity = 1; - lora_switch_run_id.innerHTML = `Run: ${finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name].specific_lora_run_id}`; - lora_switch_checkpoint.innerHTML = `Checkpoint: ${finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name].specific_checkpoint}`; + lora_switch_run_id.innerHTML = `Run: ${finetune_configs_and_runs.active[finetune_model].specific_lora_run_id}`; + lora_switch_checkpoint.innerHTML = `Checkpoint: ${finetune_configs_and_runs.active[finetune_model].specific_checkpoint}`; } else if (mode == 'latest-best') { lora_switch_run_id.style.display = 'block'; lora_switch_checkpoint.style.display = 'block'; @@ -301,8 +357,8 @@ function render_lora_switch() { } else { lora_switch_run_id.style.display = 'none'; lora_switch_checkpoint.style.display = 'none'; - lora_switch_run_id.innerHTML = `Run: ${finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name].specific_lora_run_id}`; - lora_switch_checkpoint.innerHTML = `Checkpoint: ${finetune_configs_and_runs.active[finetune_configs_and_runs.config.model_name].specific_checkpoint}`; + lora_switch_run_id.innerHTML = `Run: ${finetune_configs_and_runs.active[finetune_model].specific_lora_run_id}`; + lora_switch_checkpoint.innerHTML = `Checkpoint: ${finetune_configs_and_runs.active[finetune_model].specific_checkpoint}`; } } @@ -332,9 +388,22 @@ function render_checkpoints(data = []) { row.classList.add('table-success'); } const activate_cell = document.createElement('td'); - activate_cell.innerHTML = ``; + const download_cell = document.createElement('td'); + + activate_cell.innerHTML = ` + `; + download_cell.innerHTML = ` + + + `; + row.appendChild(activate_cell); + row.appendChild(download_cell); row.appendChild(cell); + checkpoints.appendChild(row); activate_cell.addEventListener('click', (event) => { if(!row.classList.contains('table-success')) { @@ -344,7 +413,10 @@ function render_checkpoints(data = []) { } row.classList.add('table-success'); } - finetune_switch_activate("specific", selected_lora, cell.dataset.checkpoint); + finetune_activate_run(selected_lora, cell.dataset.checkpoint); + }); + activate_cell.addEventListener('click', (event) => { + event.stopPropagation(); }); }); } @@ -359,12 +431,12 @@ function animate_use_model() { function finetune_switch_activate(lora_mode, run_id, checkpoint) { animate_use_model(); - const model_name = document.querySelector('#finetune-model').value + const finetune_model = finetune_configs_and_runs.completion_model.finetune; let send_this = { - "model": model_name, + "model": finetune_model, "lora_mode": lora_mode, - "specific_lora_run_id": run_id ? run_id : finetune_configs_and_runs.active[model_name].specific_lora_run_id, - "specific_checkpoint": checkpoint ? checkpoint : finetune_configs_and_runs.active[model_name].specific_checkpoint, + "specific_lora_run_id": run_id ? run_id : finetune_configs_and_runs.active[finetune_model].specific_lora_run_id, + "specific_checkpoint": checkpoint ? checkpoint : finetune_configs_and_runs.active[finetune_model].specific_checkpoint, } fetch("/tab-finetune-activate", { method: "POST", @@ -874,6 +946,8 @@ function start_log_stream(run_id) { }; fetchData(); } + + export async function init() { let req = await fetch('/tab-finetune.html'); document.querySelector('#finetune').innerHTML = await req.text(); @@ -991,12 +1065,21 @@ export async function init() { model_select_dropdown.addEventListener('change', function() { change_finetune_model(); }); + } export function tab_switched_here() { tab_finetune_get(); tab_finetune_config_and_runs(); render_schedule_dialog(); + init_upload_files_modal( + document.querySelector('#lora-upload-files-modal'), + document.querySelector('#finetune-upload-lora-open-modal'), + 'Upload Lora', + 'link', + '/lora-upload-url', '/lora-upload', + "Loading lora. This may take a few more minutes..." + ); } export function tab_switched_away() { @@ -1004,6 +1087,7 @@ export function tab_switched_away() { logs_streamer_to_stop.cancel(); logs_streamer_to_stop = undefined; } + upload_files_modal_switch_away(document.querySelector('#lora-upload-files-modal')); } export function tab_update_each_couple_of_seconds() { diff --git a/self_hosting_machinery/webgui/static/tab-model-hosting.html b/self_hosting_machinery/webgui/static/tab-model-hosting.html index 9a48bf9be..3a5c73c50 100644 --- a/self_hosting_machinery/webgui/static/tab-model-hosting.html +++ b/self_hosting_machinery/webgui/static/tab-model-hosting.html @@ -9,6 +9,7 @@

Hosted Models

Model Completion + Finetune Sharding Share GPU diff --git a/self_hosting_machinery/webgui/static/tab-model-hosting.js b/self_hosting_machinery/webgui/static/tab-model-hosting.js index dad449276..8536b651d 100644 --- a/self_hosting_machinery/webgui/static/tab-model-hosting.js +++ b/self_hosting_machinery/webgui/static/tab-model-hosting.js @@ -146,6 +146,7 @@ function render_models_assigned(models) { row.setAttribute('data-model',index); const model_name = document.createElement("td"); const completion = document.createElement("td"); + const finetune_info = document.createElement("td"); const select_gpus = document.createElement("td"); const gpus_share = document.createElement("td"); const del = document.createElement("td"); @@ -168,6 +169,21 @@ function render_models_assigned(models) { completion.appendChild(completion_input); } + if (models_info[index].hasOwnProperty('finetune_info') && models_info[index].finetune_info) { + finetune_info.innerHTML = ` + + + + + + + + + +
Run: ${models_info[index].finetune_info.run}
Checkpoint: ${models_info[index].finetune_info.checkpoint}
+ `; + } + if (models_info[index].hasOwnProperty('has_sharding') && models_info[index].has_sharding) { const select_gpus_div = document.createElement("div"); select_gpus_div.setAttribute("class", "btn-group btn-group-sm"); @@ -233,6 +249,7 @@ function render_models_assigned(models) { row.appendChild(model_name); row.appendChild(completion); + row.appendChild(finetune_info); row.appendChild(select_gpus); row.appendChild(gpus_share); row.appendChild(del); diff --git a/self_hosting_machinery/webgui/static/tab-upload.html b/self_hosting_machinery/webgui/static/tab-upload.html index 12dad8db6..c5f0e1ba7 100644 --- a/self_hosting_machinery/webgui/static/tab-upload.html +++ b/self_hosting_machinery/webgui/static/tab-upload.html @@ -1,4 +1,6 @@
+
+

Code and Text Sources

@@ -19,10 +21,7 @@

Code and Text Sources

- - +
@@ -76,56 +75,6 @@
Files for Vector DB
- - - - - -