mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Update server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
parent
0f7b9bba33
commit
f746b8e0ae
@ -160,6 +160,8 @@ class LlamaRMSNorm(nn.Module):
|
|||||||
self.variance_epsilon,
|
self.variance_epsilon,
|
||||||
)
|
)
|
||||||
return out, residual
|
return out, residual
|
||||||
|
else:
|
||||||
|
raise RuntimeError("system not supported")
|
||||||
|
|
||||||
|
|
||||||
def load_attention(config, prefix, weights):
|
def load_attention(config, prefix, weights):
|
||||||
|
Loading…
Reference in New Issue
Block a user