mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
cleaning
This commit is contained in:
parent
ea8438a5a0
commit
2446928768
@ -26,8 +26,6 @@ from transformers.activations import ACT2FN
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from typing import Optional, List, Tuple
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from text_generation_server.utils import paged_attention, flash_attn
|
||||
from text_generation_server.utils.layers import (
|
||||
TensorParallelRowLinear,
|
||||
@ -44,7 +42,6 @@ if IS_CUDA_SYSTEM:
|
||||
elif IS_ROCM_SYSTEM:
|
||||
from vllm import layernorm_ops
|
||||
|
||||
torch.set_printoptions(threshold=10000000, sci_mode=True)
|
||||
|
||||
class LlamaConfig(PretrainedConfig):
|
||||
def __init__(
|
||||
|
@ -70,7 +70,6 @@ def attention(
|
||||
softmax_scale,
|
||||
window_size_left=-1,
|
||||
):
|
||||
# logger.info(f"HAS_FLASH_ATTN_V2 {HAS_FLASH_ATTN_V2}")
|
||||
if HAS_FLASH_ATTN_V2_CUDA:
|
||||
return flash_attn_2_cuda.varlen_fwd(
|
||||
q,
|
||||
|
@ -516,7 +516,6 @@ try:
|
||||
class FastLayerNorm(nn.LayerNorm):
|
||||
def forward(self, hidden_states, residual=None):
|
||||
if hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM:
|
||||
# Mistral does not use RMSNorm.
|
||||
if residual is not None:
|
||||
hidden_states += residual
|
||||
residual = hidden_states
|
||||
|
Loading…
Reference in New Issue
Block a user