From bcd9d3a5cbc6627430d49d94c5b22345925c408e Mon Sep 17 00:00:00 2001 From: Cyril Vallez Date: Thu, 23 Jan 2025 12:49:30 +0000 Subject: [PATCH] cohere fix --- .../models/transformers_flash_causal_lm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/text_generation_server/models/transformers_flash_causal_lm.py b/server/text_generation_server/models/transformers_flash_causal_lm.py index d424de35..36de89b4 100644 --- a/server/text_generation_server/models/transformers_flash_causal_lm.py +++ b/server/text_generation_server/models/transformers_flash_causal_lm.py @@ -263,5 +263,8 @@ class TransformersFlashCausalLM(FlashCausalLM): # For Granite while next transformers version is released and we can use `lm_head_indices` natively if hasattr(self.model.config, "logits_scaling"): logits = logits / self.model.config.logits_scaling + # For Cohere for similar reasons + elif hasattr(self.model, "logit_scale"): + logits = logits * self.model.logit_scale return logits, None