Update server/text_generation_server/models/custom_modeling/flash_llama_modeling.py

Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
fxmarty 2023-11-08 19:06:59 +09:00 committed by GitHub
parent 0f7b9bba33
commit f746b8e0ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -160,6 +160,8 @@ class LlamaRMSNorm(nn.Module):
self.variance_epsilon, self.variance_epsilon,
) )
return out, residual return out, residual
else:
raise RuntimeError("system not supported")
def load_attention(config, prefix, weights): def load_attention(config, prefix, weights):