Allow dtype for bitsandbytes (it works, checked for idefics

9b/llama/80b)t
This commit is contained in:
Nicolas Patry 2023-11-28 14:15:56 +00:00
parent 2713b21132
commit e3c31c9d92
3 changed files with 6 additions and 2 deletions

View File

@ -210,6 +210,7 @@ def launcher(event_loop):
quantize: Optional[str] = None,
trust_remote_code: bool = False,
use_flash_attention: bool = True,
dtype: Optional[str] = None
):
port = random.randint(8000, 10_000)
master_port = random.randint(10_000, 20_000)
@ -237,6 +238,9 @@ def launcher(event_loop):
if quantize is not None:
args.append("--quantize")
args.append(quantize)
if dtype is not None:
args.append("--dtype")
args.append(dtype)
if trust_remote_code:
args.append("--trust-remote-code")

View File

@ -3,7 +3,7 @@ import pytest
@pytest.fixture(scope="module")
def idefics_handle(launcher):
with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2) as handle:
with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2, dtype="float16") as handle:
yield handle

View File

@ -76,7 +76,7 @@ def serve(
# Downgrade enum into str for easier management later on
quantize = None if quantize is None else quantize.value
dtype = None if dtype is None else dtype.value
if dtype is not None and quantize is not None:
if dtype is not None and quantize not in {None, "bitsandbytes", "bitsandbytes-nf4", "bitsandbytes-fp4"}:
raise RuntimeError(
"Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model."
)