This commit is contained in:
Felix Marty 2023-11-07 15:01:30 +00:00
parent ea8438a5a0
commit 2446928768
3 changed files with 0 additions and 5 deletions

View File

@ -26,8 +26,6 @@ from transformers.activations import ACT2FN
from transformers.configuration_utils import PretrainedConfig from transformers.configuration_utils import PretrainedConfig
from typing import Optional, List, Tuple from typing import Optional, List, Tuple
from loguru import logger
from text_generation_server.utils import paged_attention, flash_attn from text_generation_server.utils import paged_attention, flash_attn
from text_generation_server.utils.layers import ( from text_generation_server.utils.layers import (
TensorParallelRowLinear, TensorParallelRowLinear,
@ -44,7 +42,6 @@ if IS_CUDA_SYSTEM:
elif IS_ROCM_SYSTEM: elif IS_ROCM_SYSTEM:
from vllm import layernorm_ops from vllm import layernorm_ops
torch.set_printoptions(threshold=10000000, sci_mode=True)
class LlamaConfig(PretrainedConfig): class LlamaConfig(PretrainedConfig):
def __init__( def __init__(

View File

@ -70,7 +70,6 @@ def attention(
softmax_scale, softmax_scale,
window_size_left=-1, window_size_left=-1,
): ):
# logger.info(f"HAS_FLASH_ATTN_V2 {HAS_FLASH_ATTN_V2}")
if HAS_FLASH_ATTN_V2_CUDA: if HAS_FLASH_ATTN_V2_CUDA:
return flash_attn_2_cuda.varlen_fwd( return flash_attn_2_cuda.varlen_fwd(
q, q,

View File

@ -516,7 +516,6 @@ try:
class FastLayerNorm(nn.LayerNorm): class FastLayerNorm(nn.LayerNorm):
def forward(self, hidden_states, residual=None): def forward(self, hidden_states, residual=None):
if hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM: if hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM:
# Mistral does not use RMSNorm.
if residual is not None: if residual is not None:
hidden_states += residual hidden_states += residual
residual = hidden_states residual = hidden_states