mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Update server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
This commit is contained in:
parent
1b7b91a4d3
commit
f5e8f73a1c
@ -81,8 +81,8 @@ def _load_multi_mqa_gptq(
|
|||||||
try:
|
try:
|
||||||
import os
|
import os
|
||||||
|
|
||||||
bits = int(os.getenv("GTPQ_BITS"))
|
bits = int(os.getenv("GPTQ_BITS"))
|
||||||
groupsize = int(os.getenv("GTPQ_GROUPSIZE"))
|
groupsize = int(os.getenv("GPTQ_GROUPSIZE"))
|
||||||
except Exception:
|
except Exception:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user