text-generation-inference/backends/gaudi/tgi-entrypoint.sh
yuanwu 638714f964 Add Qwen3
Signed-off-by: yuanwu <yuan.wu@intel.com>
2025-05-16 01:53:23 +00:00

20 lines
703 B
Bash

#!/bin/bash
ldconfig 2>/dev/null || echo 'unable to refresh ld cache, not a big deal in most cases'
# Check if --sharded argument is present in the command line arguments
if [[ "$*" == *"--sharded true"* ]]; then
echo 'setting PT_HPU_ENABLE_LAZY_COLLECTIVES=1 for sharding'
export PT_HPU_ENABLE_LAZY_COLLECTIVES=1
fi
# Check if ATTENTION environment variable is set to paged
if [[ "$ATTENTION" == "paged" ]]; then
# Check if Llama-4 is in the command line arguments
if [[ "$*" == *"Llama-4"* || "$*" == *"Qwen3"* ]]; then
echo 'ATTENTION=paged and Llama-4 or Qwen3 detected'
pip install git+https://github.com/huggingface/transformers.git@29338949
fi
fi
text-generation-launcher $@