diff --git a/docs/source/generate_examples.py b/docs/source/generate_examples.py index c51ca18667ef..1206d5fe7539 100644 --- a/docs/source/generate_examples.py +++ b/docs/source/generate_examples.py @@ -14,13 +14,14 @@ def fix_case(text: str) -> str: subs = { "api": "API", - "Cli": "CLI", + "cli": "CLI", "cpu": "CPU", "llm": "LLM", "tpu": "TPU", "aqlm": "AQLM", "gguf": "GGUF", "lora": "LoRA", + "rlhf": "RLHF", "vllm": "vLLM", "openai": "OpenAI", "multilora": "MultiLoRA", diff --git a/docs/source/index.md b/docs/source/index.md index 3db79456a4e4..09ada43335c7 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -105,6 +105,7 @@ features/compatibility_matrix :maxdepth: 1 training/trl.md +training/rlhf.md ::: diff --git a/docs/source/training/rlhf.md b/docs/source/training/rlhf.md new file mode 100644 index 000000000000..00822aefe11e --- /dev/null +++ b/docs/source/training/rlhf.md @@ -0,0 +1,11 @@ +# Reinforcement Learning from Human Feedback + +Reinforcement Learning from Human Feedback (RLHF) is a technique that fine-tunes language models using human-generated preference data to align model outputs with desired behaviours. + +vLLM can be used to generate the completions for RLHF. The best way to do this is with libraries like [TRL](https://github.com/huggingface/trl), [OpenRLHF](https://github.com/OpenRLHF/OpenRLHF) and [verl](https://github.com/volcengine/verl). + +See the following basic examples to get started if you don't want to use an existing library: + +- [Training and inference processes are located on separate GPUs (inspired by OpenRLHF)](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf.html) +- [Training and inference processes are colocated on the same GPUs using Ray](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_colocate.html) +- [Utilities for performing RLHF with vLLM](https://docs.vllm.ai/en/latest/getting_started/examples/rlhf_utils.html)