mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Update docs/source/basic_tutorials/train_medusa.md
Updating to a value I think produces better results overall.
This commit is contained in:
parent
33043f8255
commit
ca0589f9e5
@ -133,7 +133,7 @@ WANDB_MODE=offline torchrun --nproc_per_node=4 medusa/train/train_legacy.py \
|
||||
--data_path zephyr_self_distill.json \
|
||||
--bf16 True \
|
||||
--output_dir zephyr_out \
|
||||
--num_train_epochs 2 \
|
||||
--num_train_epochs 5 \
|
||||
--per_device_train_batch_size 4 \
|
||||
--per_device_eval_batch_size 4 \
|
||||
--gradient_accumulation_steps 4 \
|
||||
|
Loading…
Reference in New Issue
Block a user