mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Set maximum grpc message receive size to 2GiB
The previous default was 4MiB, which doesn't really work well for multi-modal models.
This commit is contained in:
parent
0f7d38e774
commit
991a1cbb3b
@ -240,7 +240,11 @@ def serve(
|
||||
interceptors=[
|
||||
ExceptionInterceptor(),
|
||||
UDSOpenTelemetryAioServerInterceptor(),
|
||||
]
|
||||
],
|
||||
options=[
|
||||
# Set the maximum possible message length: i32::MAX
|
||||
("grpc.max_receive_message_length", (1 << 31) - 1)
|
||||
],
|
||||
)
|
||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||
TextGenerationService(model, Cache(), quantize, server_urls), server
|
||||
|
Loading…
Reference in New Issue
Block a user