Fixing eetq dockerfile.

2025-09-10 20:04:52 +00:00 · 2023-09-29 06:47:35 +00:00 · 2023-09-29 06:47:35 +00:00 · 59d77e5ea8
commit 59d77e5ea8
parent 724199aaf1
2 changed files with 11 additions and 0 deletions
--- a/9
+++ b/9
@ -123,6 +123,13 @@ COPY server/Makefile-awq Makefile
 # Build specific version of transformers
 RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq

+# Build eetq kernels
+FROM kernel-builder as eetq-kernels-builder
+WORKDIR /usr/src
+COPY server/Makefile-eetq Makefile
+# Build specific version of transformers
+RUN make build-eetq
+
 # Build Transformers CUDA kernels
 FROM kernel-builder as custom-kernels-builder
 WORKDIR /usr/src
@ -178,6 +185,8 @@ COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /o
 COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
 # Copy build artifacts from awq kernels builder
 COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
+# Copy build artifacts from eetq kernels builder
+COPY --from=eetq-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages

 # Copy builds artifacts from vllm builder
 COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -297,6 +297,8 @@ def get_model(
        raise ValueError("awq quantization is not supported for AutoModel")
    elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
        raise ValueError("4bit quantization is not supported for AutoModel")
+    elif (quantize == "eetq"):
+        raise ValueError("Eetq quantization is not supported for AutoModel")
    if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
        return CausalLM(
            model_id,