From ca0589f9e575678ecdd02fe014a236455e17826b Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 23 May 2024 11:33:51 +0200 Subject: [PATCH] Update docs/source/basic_tutorials/train_medusa.md Updating to a value I think produces better results overall. --- docs/source/basic_tutorials/train_medusa.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/basic_tutorials/train_medusa.md b/docs/source/basic_tutorials/train_medusa.md index 01b3f366..76cb6bed 100644 --- a/docs/source/basic_tutorials/train_medusa.md +++ b/docs/source/basic_tutorials/train_medusa.md @@ -133,7 +133,7 @@ WANDB_MODE=offline torchrun --nproc_per_node=4 medusa/train/train_legacy.py \ --data_path zephyr_self_distill.json \ --bf16 True \ --output_dir zephyr_out \ - --num_train_epochs 2 \ + --num_train_epochs 5 \ --per_device_train_batch_size 4 \ --per_device_eval_batch_size 4 \ --gradient_accumulation_steps 4 \