From f85a308ef1e88fa65b7778c6a60b525774beac28 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Wed, 9 Oct 2024 20:05:39 +0200 Subject: [PATCH] remove debugging lines --- server/text_generation_server/models/flash_causal_lm.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 98de8c79..7e256dcf 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -2018,12 +2018,6 @@ class FlashCausalLM(Model): top_token_ids, top_token_logprobs, ) in enumerate(iterator): - if all_input_ids[:2] == [1986, 374] and not request_is_prefilling: - log_master( - logger.info, - f"{request.id} {next_token_ids} {self.tokenizer.batch_decode(next_token_ids)}", - ) - # Compute logprobs first as, even though we might skip the token, # it can still be required to compute the logprobs # modulo on request.id as it is robust to batch.filter whereas the index in the batch is not and we need