mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 19:34:53 +00:00
Easier quantization.
This commit is contained in:
parent
e2d167256a
commit
4882de4d7a
@ -61,7 +61,9 @@ def serve(
|
|||||||
if otlp_endpoint is not None:
|
if otlp_endpoint is not None:
|
||||||
setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint)
|
setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint)
|
||||||
|
|
||||||
server.serve(model_id, revision, sharded, quantize.value, uds_path)
|
# Downgrade enum into str for easier management later on
|
||||||
|
quantize = None if quantize is None else quantize.value
|
||||||
|
server.serve(model_id, revision, sharded, quantize, uds_path)
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
|
Loading…
Reference in New Issue
Block a user