diff --git a/server/text_generation_server/models/custom_modeling/mpt_modeling.py b/server/text_generation_server/models/custom_modeling/mpt_modeling.py index e60571168..5ccf796df 100644 --- a/server/text_generation_server/models/custom_modeling/mpt_modeling.py +++ b/server/text_generation_server/models/custom_modeling/mpt_modeling.py @@ -297,7 +297,7 @@ def triton_flash_attn_fn( class MultiheadAttention(nn.Module): """Multi-head self attention. - Using torch or triton attention implemetation enables user to also use + Using torch or triton attention implementation enables user to also use additive bias. """ @@ -386,7 +386,7 @@ class MultiheadAttention(nn.Module): class MultiQueryAttention(nn.Module): """Multi-Query self attention. - Using torch or triton attention implemetation enables user to also use + Using torch or triton attention implementation enables user to also use additive bias. """