diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index 233893fe..b12a9751 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -32,7 +32,6 @@ def serve( quantize: Optional[Quantization] = None, dtype: Optional[Dtype] = None, trust_remote_code: bool = False, - backend: str = "cuda", uds_path: Path = "/tmp/text-generation-server", logger_level: str = "INFO", json_output: bool = False, @@ -80,7 +79,7 @@ def serve( "Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model." ) server.serve( - model_id, revision, sharded, quantize, dtype, backend, trust_remote_code, uds_path + model_id, revision, sharded, quantize, dtype, trust_remote_code, uds_path )