text-generation-inference/server/text_generation_server/models/model.py

36 lines
1.2 KiB
Python
Raw Normal View History

import torch
from abc import ABC, abstractmethod
from typing import List, Tuple, Optional, TypeVar, Type
from transformers import PreTrainedTokenizerBase
2023-03-07 17:52:22 +00:00
from text_generation_server.models.types import Batch, GeneratedText
B = TypeVar("B", bound=Batch)
class Model(ABC):
def __init__(self, tokenizer: PreTrainedTokenizerBase, device: torch.device):
self.tokenizer = tokenizer
self.all_special_ids = set(tokenizer.all_special_ids)
self.device = device
@property
@abstractmethod
def batch_type(self) -> Type[B]:
raise NotImplementedError
@abstractmethod
def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]:
raise NotImplementedError
2023-04-04 10:35:29 +00:00
def decode_token(self, previous_token_id: int, token_id: int) -> str:
"""Hack to hopefully support generate_stream for the maximum number of tokenizers"""
2023-04-04 10:35:29 +00:00
# Decode previous token and previous token + token
results = self.tokenizer.batch_decode(
[[previous_token_id], [previous_token_id, token_id]],
skip_special_tokens=False,
)
2023-04-04 10:35:29 +00:00
# slice to remove previous token
return results[1][len(results[0]) :]