From 25c48f5679c1f2a148adc57422f7163ab9ce46e5 Mon Sep 17 00:00:00 2001 From: marcusdunn Date: Tue, 15 Aug 2023 15:12:32 -0700 Subject: [PATCH] added a tokenizer to `HeterogeneousNextTokenChooser` I'm not super happy with this solution - but it seemed most in line with the current way of passing in extra params into `from_pb` in `concatenate`. --- server/text_generation_server/models/flash_causal_lm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index 860c9bc5..00fc574e 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -634,8 +634,7 @@ class FlashCausalLMBatch(Batch): next_token_chooser_parameters, dtype=batches[0].next_token_chooser.dtype, device=batches[0].next_token_chooser.device, - # todo - determine how to obtain access to a tokenizer here - tokenizer=... + tokenizer=batches[0].next_token_chooser.tokenizer ) # Needed to avoid dropping blocks when the batches will go out of scope