add requirements to docker

This commit is contained in:
michaelfeil 2023-07-22 23:42:30 +02:00
parent 3f2fce87e7
commit 7338e0097f
4 changed files with 16 additions and 8 deletions

View File

@ -188,7 +188,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \ RUN cd server && \
make gen-server && \ make gen-server && \
pip install -r requirements.txt && \ pip install -r requirements.txt && \
pip install ".[bnb, accelerate]" --no-cache-dir pip install ".[bnb, accelerate, ct2]" --no-cache-dir
# Install benchmarker # Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

View File

@ -17,6 +17,7 @@ grpc-interceptor = "^0.15.0"
typer = "^0.6.1" typer = "^0.6.1"
accelerate = { version = "^0.19.0", optional = true } accelerate = { version = "^0.19.0", optional = true }
bitsandbytes = { version = "^0.38.1", optional = true } bitsandbytes = { version = "^0.38.1", optional = true }
ctranslate2 = { version = "^3.17.1", optional = true }
safetensors = "0.3.1" safetensors = "0.3.1"
loguru = "^0.6.0" loguru = "^0.6.0"
opentelemetry-api = "^1.15.0" opentelemetry-api = "^1.15.0"
@ -32,6 +33,7 @@ einops = "^0.6.1"
[tool.poetry.extras] [tool.poetry.extras]
accelerate = ["accelerate"] accelerate = ["accelerate"]
bnb = ["bitsandbytes"] bnb = ["bitsandbytes"]
ct2 = ["ctranslate2"]
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
grpcio-tools = "^1.51.1" grpcio-tools = "^1.51.1"

View File

@ -89,6 +89,7 @@ def download_weights(
auto_convert: bool = True, auto_convert: bool = True,
logger_level: str = "INFO", logger_level: str = "INFO",
json_output: bool = False, json_output: bool = False,
trust_remote_code: bool = False
): ):
# Remove default handler # Remove default handler
logger.remove() logger.remove()
@ -168,6 +169,7 @@ def download_weights(
config = AutoConfig.from_pretrained( config = AutoConfig.from_pretrained(
model_id, model_id,
revision=revision, revision=revision,
trust_remote_code=trust_remote_code
) )
architecture = config.architectures[0] architecture = config.architectures[0]

View File

@ -2,7 +2,9 @@ import torch
import inspect import inspect
import numpy as np import numpy as np
import os import os
from pathlib import Path
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from dataclasses import dataclass from dataclasses import dataclass
from opentelemetry import trace from opentelemetry import trace
from transformers import ( from transformers import (
@ -75,8 +77,9 @@ class CT2CausalLM(Model):
# " sampling based / non-greedy next_token" # " sampling based / non-greedy next_token"
# " of code only working in float16.") # " of code only working in float16.")
# Start CT2 - conversion # Start CT2 - conversion
out_dir = f"./ct2-{model_id.replace('/','_')}-{ct2_compute_type}" out_dir = Path(HUGGINGFACE_HUB_CACHE) / \
if not os.path.exists(os.path.join(out_dir, "model.bin")): f"ct2models-{model_id.replace('/','--')}--{ct2_compute_type}"
if not os.path.exists(out_dir / "model.bin"):
ex = "" ex = ""
try: try:
converter = ctranslate2.converters.TransformersConverter( converter = ctranslate2.converters.TransformersConverter(
@ -95,9 +98,9 @@ class CT2CausalLM(Model):
) )
except Exception as ex: except Exception as ex:
pass pass
if not os.path.exists(os.path.join(out_dir, "model.bin")) or ex: if not os.path.exists(out_dir / "model.bin") or ex:
raise ValueError( raise ValueError(
f"conversion for {model_id} failed with ctranslate2: Error {ex}" f"conversion with ctranslate2 for {model_id} failed : Error {ex}"
) )
# Start CT2 # Start CT2
@ -108,10 +111,11 @@ class CT2CausalLM(Model):
class DummyModel(torch.nn.Module): class DummyModel(torch.nn.Module):
def __init__(self, *args, **kwargs) -> None: def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self.config = AutoConfig.from_pretrained(model_id, revision=revision) self.config = AutoConfig.from_pretrained(
model_id, revision=revision,
trust_remote_code=trust_remote_code)
model = DummyModel() model = DummyModel()
self.vocab_size = model.config.vocab_size
if tokenizer.pad_token_id is None: if tokenizer.pad_token_id is None:
if model.config.pad_token_id is not None: if model.config.pad_token_id is not None:
@ -165,7 +169,7 @@ class CT2CausalLM(Model):
# sampling_temperature=0, # sampling_temperature=0,
# ) # )
# # create fake logits from greedy token # # create fake logits from greedy token
# logits = torch.full((len(tokens_in), 1, self.vocab_size), -10, dtype=torch.float16, device="cuda") # logits = torch.full((len(tokens_in), 1, self.model.config.vocab_size), -10, dtype=torch.float16, device="cuda")
# for i, seq in enumerate(ids): # for i, seq in enumerate(ids):
# token = seq.sequences_ids[0] # token = seq.sequences_ids[0]
# logits[i, 0, token] = 10 # logits[i, 0, token] = 10