From 56c3eb4adb91276a29625f245a61633ed5537da9 Mon Sep 17 00:00:00 2001
From: yuanwu2017 <yuan.wu@intel.com>
Date: Fri, 8 Nov 2024 01:22:24 +0800
Subject: [PATCH 1/2] Remove the torch package in requirements.txt (#246)

Signed-off-by: yuanwu <yuan.wu@intel.com>
---
 server/requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/requirements.txt b/server/requirements.txt
index d941a894..a940574f 100644
--- a/server/requirements.txt
+++ b/server/requirements.txt
@@ -74,7 +74,6 @@ six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
 sympy==1.12.1 ; python_version >= "3.9" and python_version < "3.13"
 threadpoolctl==3.5.0 ; python_version >= "3.9" and python_version < "3.13"
 tokenizers==0.20.1 ; python_version >= "3.9" and python_version < "3.13"
-torch==2.4.0a0+git74cd574 ; python_version >= "3.9" and python_version < "3.13"
 tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13"
 transformers==4.45.2 ; python_version >= "3.9" and python_version < "3.13"
 transformers[sentencepiece]==4.45.2 ; python_version >= "3.9" and python_version < "3.13"

From d49ce00f405102262773b801dbd8677094f1d68a Mon Sep 17 00:00:00 2001
From: srajabos <shifani.rajabose@intel.com>
Date: Mon, 18 Nov 2024 16:38:30 -0500
Subject: [PATCH 2/2] With this change, bucketing/padding of input is applied
 to health check. (#245)

---
 server/text_generation_server/models/causal_lm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py
index c15e6e4e..70b32e4a 100644
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@@ -465,6 +465,8 @@ class CausalLMBatch(Batch):
         requests = [CausalLMRequest.from_pb(idx, req, tokenizer) for idx, req in enumerate(pb.requests)]
 
         max_input_length = max(r.data.truncate for r in requests)
+        if max_input_length < PAD_SEQUENCE_TO_MULTIPLE_OF:
+             max_input_length = PAD_SEQUENCE_TO_MULTIPLE_OF
         max_new_tokens = max(r.stopping_criteria.max_new_tokens for r in requests)
 
         # TODO: Add support for sparse batches