chore: removed unused import.

2025-09-11 20:34:54 +00:00 · 2024-05-14 11:00:45 +04:00 · 2024-05-14 11:00:45 +04:00 · 011887f15c
commit 011887f15c
parent 56ed686942
1 changed files with 6 additions and 9 deletions
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -18,25 +18,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import List, Optional, Tuple
 import torch
 import torch.distributed
 from torch import nn
 from transformers.activations import ACT2FN
 from typing import Optional, List, Tuple
 from text_generation_server.utils import paged_attention, flash_attn
 from text_generation_server.layers import (
-    TensorParallelRowLinear,
+    SpeculativeHead,
    TensorParallelColumnLinear,
    TensorParallelEmbedding,
-    SpeculativeHead,
+    TensorParallelRowLinear,
    get_linear,
 )
 from text_generation_server.layers.layernorm import FastRMSNorm
 from text_generation_server.layers.rotary import PositionRotaryEmbedding
-from text_generation_server.layers.layernorm import (
+from text_generation_server.utils import flash_attn, paged_attention
    FastRMSNorm,
 )
 def load_attention(config, prefix, weights):