From 9b86418e2125c667ef60b69dd93a1e1c08da9fc5 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 2 Apr 2024 19:25:01 +0000 Subject: [PATCH] Fixing cohere tokenizer. --- server/text_generation_server/models/flash_cohere.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/flash_cohere.py b/server/text_generation_server/models/flash_cohere.py index 33b053a6..181a93b1 100644 --- a/server/text_generation_server/models/flash_cohere.py +++ b/server/text_generation_server/models/flash_cohere.py @@ -3,7 +3,7 @@ import torch.distributed from opentelemetry import trace from typing import Optional -from transformers.models.llama import LlamaTokenizerFast +from transformers import AutoTokenizer from text_generation_server.models import FlashCausalLM from text_generation_server.models.custom_modeling.flash_cohere_modeling import ( @@ -36,7 +36,7 @@ class FlashCohere(FlashCausalLM): else: raise NotImplementedError("FlashCohere is only available on GPU") - tokenizer = LlamaTokenizerFast.from_pretrained( + tokenizer = AutoTokenizer.from_pretrained( model_id, revision=revision, padding_side="left",