Skip to content

Commit 267bd47

Browse files
committed
Update training scripts of step 2 DPO in DeepSpeed-Chat.
1 parent 741f78b commit 267bd47

File tree

17 files changed

+65
-326
lines changed

17 files changed

+65
-326
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
### 💁For each folder, the bash scripts are examples of "facebook/opt" family.
22

33
If you want to change your model such as EleutherAI/gpt-j-6b, you may simply replace
4-
`` --model_name_or_path facebook/opt-1.3b`` to ``--model_name_or_path EleutherAI/gpt-j-6b ``.
4+
`` --model_name_or_path facebook/opt-350m`` to ``--model_name_or_path EleutherAI/gpt-neo-125m ``.
55

66
For the models we support, please see [our landing page](./../../../README.md#-supported-models-)

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,30 @@
66
OUTPUT=$1
77
ZERO_STAGE=$2
88
if [ "$OUTPUT" == "" ]; then
9-
OUTPUT=./output_step1_llama2_7b
9+
OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6
1010
fi
1111
if [ "$ZERO_STAGE" == "" ]; then
1212
ZERO_STAGE=3
1313
fi
1414
mkdir -p $OUTPUT
1515

1616
deepspeed main.py \
17-
--data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
17+
--data_path Dahoas/rm-static \
1818
--data_split 2,4,4 \
1919
--model_name_or_path meta-llama/Llama-2-7b-hf \
20-
--per_device_train_batch_size 4 \
21-
--per_device_eval_batch_size 4 \
20+
--per_device_train_batch_size 8 \
21+
--per_device_eval_batch_size 8 \
2222
--max_seq_len 512 \
2323
--learning_rate 9.65e-6 \
24-
--weight_decay 0. \
25-
--num_train_epochs 4 \
24+
--weight_decay 0.1 \
25+
--num_train_epochs 1 \
2626
--gradient_accumulation_steps 1 \
2727
--lr_scheduler_type cosine \
2828
--num_warmup_steps 0 \
2929
--seed 1234 \
3030
--gradient_checkpointing \
3131
--zero_stage $ZERO_STAGE \
3232
--deepspeed \
33+
--offload \
3334
--output_dir $OUTPUT \
3435
&> $OUTPUT/training.log

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/llama2/run_llama2_7b_lora.sh

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,30 +6,31 @@
66
OUTPUT=$1
77
ZERO_STAGE=$2
88
if [ "$OUTPUT" == "" ]; then
9-
OUTPUT=./output_step1_llama2_7b_lora
9+
OUTPUT=./output_step2_llama_7b_epoch1_lr9.65e-6
1010
fi
1111
if [ "$ZERO_STAGE" == "" ]; then
1212
ZERO_STAGE=3
1313
fi
1414
mkdir -p $OUTPUT
1515

1616
deepspeed main.py \
17-
--data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
17+
--data_path Dahoas/rm-static \
1818
--data_split 2,4,4 \
1919
--model_name_or_path meta-llama/Llama-2-7b-hf \
20-
--per_device_train_batch_size 4 \
21-
--per_device_eval_batch_size 4 \
20+
--per_device_train_batch_size 8 \
21+
--per_device_eval_batch_size 8 \
2222
--max_seq_len 512 \
2323
--learning_rate 9.65e-6 \
24-
--weight_decay 0. \
25-
--num_train_epochs 4 \
24+
--weight_decay 0.1 \
25+
--num_train_epochs 1 \
2626
--gradient_accumulation_steps 1 \
2727
--lr_scheduler_type cosine \
2828
--num_warmup_steps 0 \
2929
--seed 1234 \
3030
--gradient_checkpointing \
3131
--zero_stage $ZERO_STAGE \
3232
--deepspeed \
33+
--offload \
3334
--lora_dim 128 \
3435
--lora_module_name "layers." \
3536
--output_dir $OUTPUT \
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,25 @@ if [ "$OUTPUT" == "" ]; then
99
OUTPUT=./output
1010
fi
1111
if [ "$ZERO_STAGE" == "" ]; then
12-
ZERO_STAGE=3
12+
ZERO_STAGE=0
1313
fi
1414
mkdir -p $OUTPUT
1515

1616
deepspeed main.py \
1717
--data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
1818
--data_split 2,4,4 \
19-
--model_name_or_path facebook/opt-6.7b \
20-
--per_device_train_batch_size 6 \
21-
--per_device_eval_batch_size 6 \
19+
--model_name_or_path facebook/opt-350m \
20+
--per_device_train_batch_size 2 \
21+
--per_device_eval_batch_size 2 \
2222
--max_seq_len 512 \
23-
--learning_rate 9.65e-6 \
24-
--weight_decay 0. \
25-
--num_train_epochs 16 \
23+
--learning_rate 5e-5 \
24+
--weight_decay 0.1 \
25+
--dropout 0.0 \
26+
--num_train_epochs 1 \
2627
--gradient_accumulation_steps 1 \
2728
--lr_scheduler_type cosine \
2829
--num_warmup_steps 0 \
2930
--seed 1234 \
30-
--gradient_checkpointing \
3131
--zero_stage $ZERO_STAGE \
3232
--deepspeed \
3333
--output_dir $OUTPUT \
Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
# DeepSpeed Team
6-
7-
# Note that usually LoRA needs to use larger learning rate
86
OUTPUT=$1
97
ZERO_STAGE=$2
108
if [ "$OUTPUT" == "" ]; then
@@ -15,8 +13,8 @@ if [ "$ZERO_STAGE" == "" ]; then
1513
fi
1614
mkdir -p $OUTPUT
1715

18-
deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-1.3b \
19-
--gradient_accumulation_steps 8 --lora_dim 128 --zero_stage $ZERO_STAGE \
16+
deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-350m \
17+
--weight_decay 0.1 --dropout 0.0 --gradient_accumulation_steps 4 --zero_stage $ZERO_STAGE \
2018
--enable_tensorboard \
2119
--tensorboard_path $OUTPUT \
2220
--deepspeed --output_dir $OUTPUT &> $OUTPUT/training.log

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_gpu/run_6.7b_lora.sh

Lines changed: 0 additions & 31 deletions
This file was deleted.

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b.sh

Lines changed: 0 additions & 35 deletions
This file was deleted.

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_1.3b_lora.sh

Lines changed: 0 additions & 31 deletions
This file was deleted.

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_13b.sh

Lines changed: 0 additions & 36 deletions
This file was deleted.

applications/DeepSpeed-Chat/training/step2_dpo_finetuning/training_scripts/opt/single_node/run_30b_lora.sh

Lines changed: 0 additions & 28 deletions
This file was deleted.

0 commit comments

Comments
 (0)