Update docs/source/basic_tutorials/train_medusa.md

Updating to a value I think produces better results overall.
This commit is contained in:
Nicolas Patry 2024-05-23 11:33:51 +02:00 committed by GitHub
parent 33043f8255
commit ca0589f9e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -133,7 +133,7 @@ WANDB_MODE=offline torchrun --nproc_per_node=4 medusa/train/train_legacy.py \
--data_path zephyr_self_distill.json \ --data_path zephyr_self_distill.json \
--bf16 True \ --bf16 True \
--output_dir zephyr_out \ --output_dir zephyr_out \
--num_train_epochs 2 \ --num_train_epochs 5 \
--per_device_train_batch_size 4 \ --per_device_train_batch_size 4 \
--per_device_eval_batch_size 4 \ --per_device_eval_batch_size 4 \
--gradient_accumulation_steps 4 \ --gradient_accumulation_steps 4 \