mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Update server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
parent
0f7b9bba33
commit
f746b8e0ae
@ -160,6 +160,8 @@ class LlamaRMSNorm(nn.Module):
|
||||
self.variance_epsilon,
|
||||
)
|
||||
return out, residual
|
||||
else:
|
||||
raise RuntimeError("system not supported")
|
||||
|
||||
|
||||
def load_attention(config, prefix, weights):
|
||||
|
Loading…
Reference in New Issue
Block a user