mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Allow dtype for bitsandbytes (it works, checked for idefics
9b/llama/80b)t
This commit is contained in:
parent
2713b21132
commit
e3c31c9d92
@ -210,6 +210,7 @@ def launcher(event_loop):
|
|||||||
quantize: Optional[str] = None,
|
quantize: Optional[str] = None,
|
||||||
trust_remote_code: bool = False,
|
trust_remote_code: bool = False,
|
||||||
use_flash_attention: bool = True,
|
use_flash_attention: bool = True,
|
||||||
|
dtype: Optional[str] = None
|
||||||
):
|
):
|
||||||
port = random.randint(8000, 10_000)
|
port = random.randint(8000, 10_000)
|
||||||
master_port = random.randint(10_000, 20_000)
|
master_port = random.randint(10_000, 20_000)
|
||||||
@ -237,6 +238,9 @@ def launcher(event_loop):
|
|||||||
if quantize is not None:
|
if quantize is not None:
|
||||||
args.append("--quantize")
|
args.append("--quantize")
|
||||||
args.append(quantize)
|
args.append(quantize)
|
||||||
|
if dtype is not None:
|
||||||
|
args.append("--dtype")
|
||||||
|
args.append(dtype)
|
||||||
if trust_remote_code:
|
if trust_remote_code:
|
||||||
args.append("--trust-remote-code")
|
args.append("--trust-remote-code")
|
||||||
|
|
||||||
|
@ -3,7 +3,7 @@ import pytest
|
|||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def idefics_handle(launcher):
|
def idefics_handle(launcher):
|
||||||
with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2) as handle:
|
with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2, dtype="float16") as handle:
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ def serve(
|
|||||||
# Downgrade enum into str for easier management later on
|
# Downgrade enum into str for easier management later on
|
||||||
quantize = None if quantize is None else quantize.value
|
quantize = None if quantize is None else quantize.value
|
||||||
dtype = None if dtype is None else dtype.value
|
dtype = None if dtype is None else dtype.value
|
||||||
if dtype is not None and quantize is not None:
|
if dtype is not None and quantize not in {None, "bitsandbytes", "bitsandbytes-nf4", "bitsandbytes-fp4"}:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model."
|
"Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model."
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user