mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Fixing eetq dockerfile.
This commit is contained in:
parent
724199aaf1
commit
59d77e5ea8
@ -123,6 +123,13 @@ COPY server/Makefile-awq Makefile
|
|||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
||||||
|
|
||||||
|
# Build eetq kernels
|
||||||
|
FROM kernel-builder as eetq-kernels-builder
|
||||||
|
WORKDIR /usr/src
|
||||||
|
COPY server/Makefile-eetq Makefile
|
||||||
|
# Build specific version of transformers
|
||||||
|
RUN make build-eetq
|
||||||
|
|
||||||
# Build Transformers CUDA kernels
|
# Build Transformers CUDA kernels
|
||||||
FROM kernel-builder as custom-kernels-builder
|
FROM kernel-builder as custom-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
@ -178,6 +185,8 @@ COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /o
|
|||||||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
# Copy build artifacts from awq kernels builder
|
# Copy build artifacts from awq kernels builder
|
||||||
COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
# Copy build artifacts from eetq kernels builder
|
||||||
|
COPY --from=eetq-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
# Copy builds artifacts from vllm builder
|
# Copy builds artifacts from vllm builder
|
||||||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
@ -297,6 +297,8 @@ def get_model(
|
|||||||
raise ValueError("awq quantization is not supported for AutoModel")
|
raise ValueError("awq quantization is not supported for AutoModel")
|
||||||
elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
|
elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
|
||||||
raise ValueError("4bit quantization is not supported for AutoModel")
|
raise ValueError("4bit quantization is not supported for AutoModel")
|
||||||
|
elif (quantize == "eetq"):
|
||||||
|
raise ValueError("Eetq quantization is not supported for AutoModel")
|
||||||
if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
|
if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
|
||||||
return CausalLM(
|
return CausalLM(
|
||||||
model_id,
|
model_id,
|
||||||
|
Loading…
Reference in New Issue
Block a user