From 831a07f9900520bdee7cd1f694680b9cc84d29f2 Mon Sep 17 00:00:00 2001 From: David Holtz Date: Mon, 28 Oct 2024 16:33:07 +0000 Subject: [PATCH] fix: remove unused rotate_half --- .../models/custom_modeling/flash_qwen2_modeling.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py index f411c849..8c2c31d6 100644 --- a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py @@ -49,13 +49,6 @@ def _load_gqa(config, prefix: str, weights): ) -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - - class Qwen2Attention(torch.nn.Module): def __init__( self,