Remove useless modifications

Signed-off-by: yuanwu <yuan.wu@intel.com>
2025-09-09 19:34:53 +00:00 · 2025-06-06 07:06:19 +00:00 · 2025-06-06 07:06:19 +00:00 · 1505d4687a
commit 1505d4687a
parent 4a89f59ec7
2 changed files with 1 additions and 1 deletions
--- a/backends/gaudi/server/text_generation_server/layers/attention/hpu.py
+++ b/backends/gaudi/server/text_generation_server/layers/attention/hpu.py
@ -88,6 +88,7 @@ def attention(
    _, kv_head_num, head_size = key.shape
    query = query.view(bs, -1, head_num, head_size).transpose(1, 2)
    key = key.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
+    value = value.view(bs, -1, kv_head_num, head_size).transpose(1, 2)
    attn_output = fsdpa_op(
        query,
        key,
--- a/backends/gaudi/server/text_generation_server/layers/tensor_parallel.py
+++ b/backends/gaudi/server/text_generation_server/layers/tensor_parallel.py
@ -155,7 +155,6 @@ class TensorParallelColumnLinear(SuperLayer):

    @classmethod
    def load_multi(cls, config, prefixes: List[str], weights, bias: bool, dim: int):
-        print(f"bias: {bias}")
        if config.quantize == "exl2":
            linears = []
            for prefix in prefixes: