From 25c48f5679c1f2a148adc57422f7163ab9ce46e5 Mon Sep 17 00:00:00 2001
From: marcusdunn <marcus.s.dunn@gmail.com>
Date: Tue, 15 Aug 2023 15:12:32 -0700
Subject: [PATCH] added a tokenizer to `HeterogeneousNextTokenChooser`

I'm not super happy with this solution - but it seemed most in line with the current way of passing in extra params into `from_pb` in `concatenate`.
---
 server/text_generation_server/models/flash_causal_lm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 860c9bc5..00fc574e 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -634,8 +634,7 @@ class FlashCausalLMBatch(Batch):
             next_token_chooser_parameters,
             dtype=batches[0].next_token_chooser.dtype,
             device=batches[0].next_token_chooser.device,
-            # todo - determine how to obtain access to a tokenizer here
-            tokenizer=...
+            tokenizer=batches[0].next_token_chooser.tokenizer
         )
 
         # Needed to avoid dropping blocks when the batches will go out of scope