diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py index f1db6395..e58dab61 100644 --- a/server/text_generation_server/models/flash_llama.py +++ b/server/text_generation_server/models/flash_llama.py @@ -39,7 +39,9 @@ class FlashLlama(FlashCausalLM): raise NotImplementedError("FlashLlama does not support quantization") tokenizer = LlamaTokenizer.from_pretrained( - model_id, revision=revision, padding_side="left", + model_id, + revision=revision, + padding_side="left", ) config = AutoConfig.from_pretrained( @@ -155,7 +157,9 @@ class FlashLlamaSharded(FlashLlama): raise NotImplementedError("FlashLlama does not support quantization") tokenizer = LlamaTokenizer.from_pretrained( - model_id, revision=revision, padding_side="left", + model_id, + revision=revision, + padding_side="left", ) config = AutoConfig.from_pretrained(