From 991a1cbb3b9e1c2def06f5d01fc1447463a3824a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Mon, 17 Jun 2024 12:26:31 +0200 Subject: [PATCH] Set maximum grpc message receive size to 2GiB The previous default was 4MiB, which doesn't really work well for multi-modal models. --- server/text_generation_server/server.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py index 569b6925..a0347cd8 100644 --- a/server/text_generation_server/server.py +++ b/server/text_generation_server/server.py @@ -240,7 +240,11 @@ def serve( interceptors=[ ExceptionInterceptor(), UDSOpenTelemetryAioServerInterceptor(), - ] + ], + options=[ + # Set the maximum possible message length: i32::MAX + ("grpc.max_receive_message_length", (1 << 31) - 1) + ], ) generate_pb2_grpc.add_TextGenerationServiceServicer_to_server( TextGenerationService(model, Cache(), quantize, server_urls), server