diff --git a/server/text_generation_server/layers/attention/rocm.py b/server/text_generation_server/layers/attention/rocm.py index d65054a1..a401b589 100644 --- a/server/text_generation_server/layers/attention/rocm.py +++ b/server/text_generation_server/layers/attention/rocm.py @@ -179,7 +179,7 @@ def paged_attention( 1.0, 1.0, None, - 512, + _PARTITION_SIZE, ) return out