mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
Upgrade to SynapseAI 1.19 (#259)
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
5291f652a1
commit
46b556805b
@ -41,7 +41,7 @@ COPY launcher launcher
|
|||||||
RUN cargo build --profile release-opt
|
RUN cargo build --profile release-opt
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest as base
|
FROM vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest AS base
|
||||||
|
|
||||||
ENV ATTENTION=default
|
ENV ATTENTION=default
|
||||||
ENV PREFIX_CACHING=0
|
ENV PREFIX_CACHING=0
|
||||||
@ -75,7 +75,7 @@ RUN cd server && \
|
|||||||
make gen-server && \
|
make gen-server && \
|
||||||
pip install --no-deps -r requirements.txt && \
|
pip install --no-deps -r requirements.txt && \
|
||||||
bash ./dill-0.3.8-patch.sh && \
|
bash ./dill-0.3.8-patch.sh && \
|
||||||
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.18.0 && \
|
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.19.0 && \
|
||||||
BUILD_CUDA_EXT=0 pip install git+https://github.com/AutoGPTQ/AutoGPTQ.git@097dd04e --no-build-isolation && \
|
BUILD_CUDA_EXT=0 pip install git+https://github.com/AutoGPTQ/AutoGPTQ.git@097dd04e --no-build-isolation && \
|
||||||
pip install . --no-cache-dir
|
pip install . --no-cache-dir
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ opentelemetry-instrumentation-grpc = "^0.36b0"
|
|||||||
hf-transfer = "^0.1.2"
|
hf-transfer = "^0.1.2"
|
||||||
sentencepiece = "^0.1.97"
|
sentencepiece = "^0.1.97"
|
||||||
peft = "^0.10"
|
peft = "^0.10"
|
||||||
optimum-habana = "1.14.1"
|
optimum-habana = "1.15.0"
|
||||||
transformers = "4.45.2"
|
transformers = "4.45.2"
|
||||||
numpy = "1.26.4"
|
numpy = "1.26.4"
|
||||||
accelerate = "0.33.0"
|
accelerate = "0.33.0"
|
||||||
|
@ -12,7 +12,7 @@ colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_p
|
|||||||
coloredlogs==15.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
coloredlogs==15.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
datasets==3.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
datasets==3.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
diffusers==0.29.2 ; python_version >= "3.9" and python_version < "3.13"
|
diffusers==0.31.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
|
filelock==3.16.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "3.13"
|
frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
@ -46,7 +46,7 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
|
|||||||
opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
optimum-habana==1.14.1 ; python_version >= "3.9" and python_version < "3.13"
|
optimum-habana==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
optimum==1.23.2 ; python_version >= "3.9" and python_version < "3.13"
|
optimum==1.23.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
|
packaging==24.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
|
pandas==2.2.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
@ -67,7 +67,7 @@ requests==2.32.3 ; python_version >= "3.9" and python_version < "3.13"
|
|||||||
safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
|
safetensors==0.4.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
scikit-learn==1.5.2 ; python_version >= "3.9" and python_version < "3.13"
|
scikit-learn==1.5.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
|
scipy==1.13.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sentence-transformers[train]==3.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
sentence-transformers[train]==3.2.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
|
@ -717,11 +717,12 @@ class CausalLM(Model):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2", "falcon"]:
|
if model.config.model_type in ["llama", "mistral", "starcoder2", "qwen2", "falcon", "gpt_bigcode"]:
|
||||||
if model.config.model_type not in ["falcon"]:
|
if model.config.model_type not in ["falcon", "gpt_bigcode"]:
|
||||||
self.kwargs["attn_softmax_bf16"] = True
|
self.kwargs["attn_softmax_bf16"] = True
|
||||||
|
|
||||||
self.kwargs["trim_logits"] = True
|
if model.config.model_type not in ["gpt_bigcode"]:
|
||||||
|
self.kwargs["trim_logits"] = True
|
||||||
|
|
||||||
if os.getenv("USE_FLASH_ATTENTION", "false").lower() == "true":
|
if os.getenv("USE_FLASH_ATTENTION", "false").lower() == "true":
|
||||||
self.kwargs["use_flash_attention"] = True
|
self.kwargs["use_flash_attention"] = True
|
||||||
|
0
server/text_generation_server/pb/.gitignore
vendored
Normal file → Executable file
0
server/text_generation_server/pb/.gitignore
vendored
Normal file → Executable file
Loading…
Reference in New Issue
Block a user