mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
- Avoid theoretical hang in batcher loop - Avoid a couple of clones in the router generate method - Keep attention mask tensors as integers - Remove num_heads attribute Co-authored-by: OlivierDehaene <Olivier.dehaene@gmail.com>
25 lines
629 B
Python
25 lines
629 B
Python
import torch
|
|
|
|
from abc import ABC, abstractmethod
|
|
from typing import List, Tuple, Optional, TypeVar, Type
|
|
from tokenizers import Tokenizer
|
|
|
|
from text_generation.models.types import Batch, GeneratedText
|
|
|
|
B = TypeVar("B", bound=Batch)
|
|
|
|
|
|
class Model(ABC):
|
|
def __init__(self, tokenizer: Tokenizer, device: torch.device):
|
|
self.tokenizer = tokenizer
|
|
self.device = device
|
|
|
|
@property
|
|
@abstractmethod
|
|
def batch_type(self) -> Type[B]:
|
|
raise NotImplementedError
|
|
|
|
@abstractmethod
|
|
def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]:
|
|
raise NotImplementedError
|