From 2111ae1bd22bd6c9c405f2305559987f87014ae9 Mon Sep 17 00:00:00 2001 From: SeongBeomLEE <2712qwer@gmail.com> Date: Mon, 11 Mar 2024 13:27:09 +0900 Subject: [PATCH] fix: LlamaTokenizerFast to AutoTokenizer at flash_mistral.py --- .../models/flash_mistral.py | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/server/text_generation_server/models/flash_mistral.py b/server/text_generation_server/models/flash_mistral.py index 8149c1b0..2e1055b2 100644 --- a/server/text_generation_server/models/flash_mistral.py +++ b/server/text_generation_server/models/flash_mistral.py @@ -6,7 +6,7 @@ import numpy as np from dataclasses import dataclass from opentelemetry import trace -from transformers import PreTrainedTokenizerBase +from transformers import PreTrainedTokenizerBase, AutoTokenizer from transformers.models.llama import LlamaTokenizerFast from typing import Optional, Tuple, Type @@ -317,13 +317,22 @@ class BaseFlashMistral(FlashCausalLM): else: raise NotImplementedError("FlashMistral is only available on GPU") - tokenizer = LlamaTokenizerFast.from_pretrained( - model_id, - revision=revision, - padding_side="left", - truncation_side="left", - trust_remote_code=trust_remote_code, - ) + try: + tokenizer = LlamaTokenizerFast.from_pretrained( + model_id, + revision=revision, + padding_side="left", + truncation_side="left", + trust_remote_code=trust_remote_code, + ) + except Exception: + tokenizer = AutoTokenizer.from_pretrained( + model_id, + revision=revision, + padding_side="left", + truncation_side="left", + trust_remote_code=trust_remote_code, + ) config = config_cls.from_pretrained( model_id, revision=revision, trust_remote_code=trust_remote_code