mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
Easier quantization.
This commit is contained in:
parent
e2d167256a
commit
4882de4d7a
@ -61,7 +61,9 @@ def serve(
|
||||
if otlp_endpoint is not None:
|
||||
setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint)
|
||||
|
||||
server.serve(model_id, revision, sharded, quantize.value, uds_path)
|
||||
# Downgrade enum into str for easier management later on
|
||||
quantize = None if quantize is None else quantize.value
|
||||
server.serve(model_id, revision, sharded, quantize, uds_path)
|
||||
|
||||
|
||||
@app.command()
|
||||
|
Loading…
Reference in New Issue
Block a user