This commit is contained in:
Felix Marty 2023-11-07 15:01:30 +00:00
parent ea8438a5a0
commit 2446928768
3 changed files with 0 additions and 5 deletions

View File

@ -26,8 +26,6 @@ from transformers.activations import ACT2FN
from transformers.configuration_utils import PretrainedConfig
from typing import Optional, List, Tuple
from loguru import logger
from text_generation_server.utils import paged_attention, flash_attn
from text_generation_server.utils.layers import (
TensorParallelRowLinear,
@ -44,7 +42,6 @@ if IS_CUDA_SYSTEM:
elif IS_ROCM_SYSTEM:
from vllm import layernorm_ops
torch.set_printoptions(threshold=10000000, sci_mode=True)
class LlamaConfig(PretrainedConfig):
def __init__(

View File

@ -70,7 +70,6 @@ def attention(
softmax_scale,
window_size_left=-1,
):
# logger.info(f"HAS_FLASH_ATTN_V2 {HAS_FLASH_ATTN_V2}")
if HAS_FLASH_ATTN_V2_CUDA:
return flash_attn_2_cuda.varlen_fwd(
q,

View File

@ -516,7 +516,6 @@ try:
class FastLayerNorm(nn.LayerNorm):
def forward(self, hidden_states, residual=None):
if hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM:
# Mistral does not use RMSNorm.
if residual is not None:
hidden_states += residual
residual = hidden_states