mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
update vllm commit & fix models using sliding window
This commit is contained in:
parent
2a48a10043
commit
0d3cc033ad
@ -1,5 +1,5 @@
|
||||
commit_cuda := b5dfc61db88a81069e45b44f7cc99bd9e62a60fa
|
||||
commit_rocm := ca6913b3c2ffacdcb7d15e914dc34adbc6c89479
|
||||
commit_rocm := e5d1a20b3a8cb744ce562723f5e1dbe907ee18cc
|
||||
build-vllm-cuda:
|
||||
if [ ! -d 'vllm' ]; then \
|
||||
pip install -U ninja packaging --no-cache-dir && \
|
||||
@ -19,5 +19,5 @@ build-vllm-rocm:
|
||||
PYTORCH_ROCM_ARCH="gfx90a;gfx942" python setup.py build
|
||||
|
||||
install-vllm-rocm: build-vllm-rocm
|
||||
cd vllm && git fetch && git checkout $(commit_rocm) && \
|
||||
cd vllm && git fetch && git checkout $(commit_rocm) && \
|
||||
PYTORCH_ROCM_ARCH="gfx90a;gfx942" pip install -e .
|
||||
|
@ -412,9 +412,9 @@ def get_model(
|
||||
sliding_window = config_dict.get("sliding_window", -1)
|
||||
if sliding_window != -1 and not SUPPORTS_WINDOWING:
|
||||
logger.warning(
|
||||
f"Flash attention is available, but doesn't support windowing which is required by model {model_id}"
|
||||
f"Flash attention is available, but doesn't support windowing which is required by model {model_id} for best performance."
|
||||
)
|
||||
FLASH_ATTENTION = False
|
||||
# FLASH_ATTENTION = False
|
||||
|
||||
if model_type == MAMBA:
|
||||
return Mamba(
|
||||
|
Loading…
Reference in New Issue
Block a user