fix: include create_exllama_buffers and set_device for exllama (#2407)

This commit is contained in:
drbh 2024-08-12 17:59:37 -04:00 committed by yuanwu
parent 1f8c0f83e3
commit 10b2be6536

View File

@ -422,12 +422,16 @@ elif CAN_EXLLAMA:
if V2: if V2:
from text_generation_server.layers.gptq.exllamav2 import ( from text_generation_server.layers.gptq.exllamav2 import (
QuantLinear as ExllamaQuantLinear, # noqa: F401 QuantLinear as ExllamaQuantLinear, # noqa: F401
create_exllama_buffers, # noqa: F401
set_device, # noqa: F401
) )
HAS_EXLLAMA = "2" HAS_EXLLAMA = "2"
else: else:
from text_generation_server.layers.gptq.exllama import ( from text_generation_server.layers.gptq.exllama import (
Ex4bitLinear as ExllamaQuantLinear, # noqa: F401 Ex4bitLinear as ExllamaQuantLinear, # noqa: F401
create_exllama_buffers, # noqa: F401
set_device, # noqa: F401
) )
HAS_EXLLAMA = "1" HAS_EXLLAMA = "1"