diff --git a/README.md b/README.md index 58232c1c..0fa7a538 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Text Generation Inference (TGI) is a toolkit for deploying and serving Large Lan For a detailed starting guide, please see the [Quick Tour](https://huggingface.co/docs/text-generation-inference/quicktour). The easiest way of getting started is using the official Docker container: ```shell -model=tiiuae/falcon-7b-instruct +model=HuggingFaceH4/zephyr-7b-beta volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.1 --model-id $model diff --git a/server/text_generation_server/utils/peft.py b/server/text_generation_server/utils/peft.py index e37447dc..d37e8940 100644 --- a/server/text_generation_server/utils/peft.py +++ b/server/text_generation_server/utils/peft.py @@ -38,7 +38,7 @@ def download_and_unload_peft(model_id, revision, trust_remote_code): os.makedirs(model_id, exist_ok=True) cache_dir = model_id logger.info(f"Saving the newly created merged model to {cache_dir}") - tokenizer = AutoTokenizer.from_pretrained(base_model_id) + tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=trust_remote_code) model.save_pretrained(cache_dir, safe_serialization=True) model.config.save_pretrained(cache_dir) tokenizer.save_pretrained(cache_dir)