add lm_head

2025-09-10 11:54:52 +00:00 · 2023-06-01 11:46:51 +02:00 · 2023-06-01 11:46:51 +02:00 · 246e8f8250
commit 246e8f8250
parent f652788d54
1 changed files with 15 additions and 1 deletions
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@ -165,9 +165,21 @@ class FlashSantacoder(FlashCausalLM):
                    del value
        if model.lm_head.weight.device == torch.device("meta"):
            model.lm_head.weight = torch.nn.Parameter(model.transformer.wte.weight)
        torch.cuda.empty_cache()
        model.post_load_weights(quantize)
        uninitialized_parameters = []
        for n, p in model.named_parameters():
            if p.data.device == torch.device("meta"):
                uninitialized_parameters.append(n)
        if uninitialized_parameters:
            raise RuntimeError(
                f"found uninitialized parameters in model : {uninitialized_parameters}"
            )
    def decode(self, generated_ids: List[int]) -> str:
        # Do not skip special tokens as they are used for custom parsing rules of the generated text
        return self.tokenizer.decode(
@ -387,6 +399,8 @@ class FlashSantacoderSharded(FlashSantacoder):
                    else:
                        module._buffers[param_name] = tensor
        if model.lm_head.weight.device == torch.device("meta"):
            model.lm_head.weight = torch.nn.Parameter(model.transformer.wte.weight)
        torch.cuda.empty_cache()
        model.post_load_weights(quantize)