mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
fix: LlamaTokenizerFast to AutoTokenizer at flash_llama.py
This commit is contained in:
parent
a2cf1bdb2f
commit
abe4e4b1cc
@ -2,8 +2,8 @@ import torch
|
|||||||
import torch.distributed
|
import torch.distributed
|
||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from transformers import AutoConfig
|
from transformers import AutoConfig, AutoTokenizer
|
||||||
from transformers.models.llama import LlamaTokenizer, LlamaTokenizerFast
|
from transformers.models.llama import LlamaTokenizer
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from text_generation_server.models import FlashCausalLM
|
from text_generation_server.models import FlashCausalLM
|
||||||
@ -44,7 +44,7 @@ class FlashLlama(FlashCausalLM):
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
tokenizer = LlamaTokenizerFast.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
padding_side="left",
|
padding_side="left",
|
||||||
|
Loading…
Reference in New Issue
Block a user