Update tensor_parallel.py

Resolve the issue of abnormal conversation performance in the Baichuan large model.
2025-09-11 20:34:54 +00:00 · 2024-12-03 19:00:28 +08:00 · 2024-12-03 19:00:28 +08:00 · 731f890887
commit 731f890887
parent 2c74c55637
1 changed files with 5 additions and 0 deletions
--- a/server/text_generation_server/layers/tensor_parallel.py
+++ b/server/text_generation_server/layers/tensor_parallel.py
@ -66,6 +66,11 @@ class TensorParallelHead(SuperLayer):
            weight = weights.get_tensor(f"{prefix}.weight")
            should_gather = False
        if config.model_type == "baichuan":
            # Resolve the issue of abnormal conversation performance in the Baichuan large model.
            # https://github.com/huggingface/text-generation-inference/issues/2780
            weight = F.normalize(weight)
        return TensorParallelHead(
            get_linear(weight, bias=None),
            process_group=weights.process_group,