Add comment for why slot 0 is OK.

2025-09-12 04:44:52 +00:00 · 2024-09-10 11:46:45 +02:00 · 2024-09-10 11:46:45 +02:00 · ed51bb94ce
commit ed51bb94ce
parent 2881edb3d7
1 changed files with 3 additions and 0 deletions
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@ -1520,6 +1520,9 @@ class FlashCausalLM(Model):
            )
            # assert block_tables.shape[0] >= slots.shape[0]
            cuda_graph["block_tables"][: block_tables.shape[0]] = block_tables
        # XXX: This is working only because block 0 is reserved for the healthcheck
        # so it doesn't matter if we override it with bogus values.
        cuda_graph["slots"].fill_(0)
        cuda_graph["slots"][: slots.shape[0]] = slots
        cuda_graph["input_lengths"].zero_()