mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Update server/text_generation_server/layers/attention/xpu.py
This commit is contained in:
parent
d44688b6ac
commit
b0c168d249
@ -14,9 +14,6 @@ def attention(
|
|||||||
softmax_scale,
|
softmax_scale,
|
||||||
window_size_left=-1,
|
window_size_left=-1,
|
||||||
):
|
):
|
||||||
if window_size_left <= 0 and window_size_left != -1:
|
|
||||||
raise ValueError("`window_size_left` must be > 0 or -1")
|
|
||||||
|
|
||||||
if window_size_left != -1:
|
if window_size_left != -1:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"XPU version of Flash Attention does not support window attention (window_size_left != -1, got window_size_left={window_size_left})."
|
f"XPU version of Flash Attention does not support window attention (window_size_left != -1, got window_size_left={window_size_left})."
|
||||||
|
Loading…
Reference in New Issue
Block a user