Rebased and removed backend.

This commit is contained in:
Nicolas Patry 2023-08-09 16:08:51 +02:00
parent 25b6694da1
commit cd226354fb

View File

@ -32,7 +32,6 @@ def serve(
quantize: Optional[Quantization] = None, quantize: Optional[Quantization] = None,
dtype: Optional[Dtype] = None, dtype: Optional[Dtype] = None,
trust_remote_code: bool = False, trust_remote_code: bool = False,
backend: str = "cuda",
uds_path: Path = "/tmp/text-generation-server", uds_path: Path = "/tmp/text-generation-server",
logger_level: str = "INFO", logger_level: str = "INFO",
json_output: bool = False, json_output: bool = False,
@ -80,7 +79,7 @@ def serve(
"Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model." "Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model."
) )
server.serve( server.serve(
model_id, revision, sharded, quantize, dtype, backend, trust_remote_code, uds_path model_id, revision, sharded, quantize, dtype, trust_remote_code, uds_path
) )