From f85a308ef1e88fa65b7778c6a60b525774beac28 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Wed, 9 Oct 2024 20:05:39 +0200
Subject: [PATCH] remove debugging lines

---
 server/text_generation_server/models/flash_causal_lm.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 98de8c79..7e256dcf 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -2018,12 +2018,6 @@ class FlashCausalLM(Model):
             top_token_ids,
             top_token_logprobs,
         ) in enumerate(iterator):
-            if all_input_ids[:2] == [1986, 374] and not request_is_prefilling:
-                log_master(
-                    logger.info,
-                    f"{request.id} {next_token_ids} {self.tokenizer.batch_decode(next_token_ids)}",
-                )
-
             # Compute logprobs first as, even though we might skip the token,
             # it can still be required to compute the logprobs
             # modulo on request.id as it is robust to batch.filter whereas the index in the batch is not and we need