mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Fixing codellama loads by using purely AutoTokenizer
.
- The need for the slow tokenizer default stems from back when llama 1 was introduced and all the flags where not supported in `tokenizers`. - Fixes #1891
This commit is contained in:
parent
f41d644a90
commit
ad0b36bd28
@ -3,7 +3,6 @@ import torch.distributed
|
|||||||
|
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from transformers import AutoConfig, AutoTokenizer, GenerationConfig
|
from transformers import AutoConfig, AutoTokenizer, GenerationConfig
|
||||||
from transformers.models.llama import LlamaTokenizer
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from text_generation_server.models import FlashCausalLM
|
from text_generation_server.models import FlashCausalLM
|
||||||
@ -41,22 +40,13 @@ class FlashLlama(FlashCausalLM):
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError("FlashLlama is only available on GPU")
|
raise NotImplementedError("FlashLlama is only available on GPU")
|
||||||
|
|
||||||
try:
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
tokenizer = LlamaTokenizer.from_pretrained(
|
model_id,
|
||||||
model_id,
|
revision=revision,
|
||||||
revision=revision,
|
padding_side="left",
|
||||||
padding_side="left",
|
truncation_side="left",
|
||||||
truncation_side="left",
|
trust_remote_code=trust_remote_code,
|
||||||
trust_remote_code=trust_remote_code,
|
)
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
|
||||||
model_id,
|
|
||||||
revision=revision,
|
|
||||||
padding_side="left",
|
|
||||||
truncation_side="left",
|
|
||||||
trust_remote_code=trust_remote_code,
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
generation_config = GenerationConfig.from_pretrained(
|
generation_config = GenerationConfig.from_pretrained(
|
||||||
model_id, revision=revision, trust_remote_code=trust_remote_code
|
model_id, revision=revision, trust_remote_code=trust_remote_code
|
||||||
|
Loading…
Reference in New Issue
Block a user