From 0777749dd38972c8eac1160b4b7f56349d0d1ed7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Danie=CC=88l=20de=20Kok?= Date: Wed, 5 Jun 2024 08:28:29 +0000 Subject: [PATCH] Do not initialize scratch space when there are no ExLlamav2 layers --- server/text_generation_server/layers/gptq/exllamav2.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/text_generation_server/layers/gptq/exllamav2.py b/server/text_generation_server/layers/gptq/exllamav2.py index 16a3eb89..4d45822b 100644 --- a/server/text_generation_server/layers/gptq/exllamav2.py +++ b/server/text_generation_server/layers/gptq/exllamav2.py @@ -145,6 +145,11 @@ def set_device(device): def create_exllama_buffers(max_total_tokens: int): global LAYERS, DEVICE + # No need to initialize scratch space if there are no layers + # that use ExLLamav2. + if len(LAYERS) == 0: + return + # Find the size of the scratch space. scratch_bytes = max( layer.scratch_space_fixed(max_input_len=max_total_tokens, max_batch_size=1)