diff --git a/README.md b/README.md index bf9a9f20..fc5be00d 100644 --- a/README.md +++ b/README.md @@ -22,22 +22,12 @@ limitations under the License. - [Table of contents](#table-of-contents) - [Tested Models and Configurations](#tested-models-and-configurations) - [Running TGI on Gaudi](#running-tgi-on-gaudi) - - [TGI-Gaudi Benchmark](#tgi-gaudi-benchmark) - - [Static Batching Benchmark](#static-batching-benchmark) - - [Continuous Batching Benchmark](#continuous-batching-benchmark) - - [Tested Models and Configurations](#tested-models-and-configurations) - [Running TGI with BF16 Precision](#running-tgi-with-bf16-precision) - - [Llama2-7B on 1 Card](#llama2-7b-on-1-card) - - [Llama2-70B on 8 cards](#llama2-70b-on-8-cards) - - [Llama3.1-8B on 1 card](#llama31-8b-on-1-card) - - [Llama3.1-70B 8 cards](#llama31-70b-8-cards) - - [Llava-v1.6-Mistral-7B on 1 card](#llava-v16-mistral-7b-on-1-card) - [Running TGI with FP8 Precision](#running-tgi-with-fp8-precision) - [TGI-Gaudi Benchmark](#tgi-gaudi-benchmark) - [Adjusting TGI Parameters](#adjusting-tgi-parameters) - [Environment Variables](#environment-variables) - [Profiler](#profiler) - - [License](#license) ## Tested Models and Configurations diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py index 11a7d7a6..bc458a91 100644 --- a/server/text_generation_server/server.py +++ b/server/text_generation_server/server.py @@ -104,14 +104,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): max_supported_total_tokens = self.model.warmup(request) return generate_pb2.WarmupResponse(max_supported_total_tokens=max_supported_total_tokens) - # else: - # batch = self.model.batch_type.from_pb( - # request.batch, self.model.tokenizer, self.model.dtype, self.model.device - # ) - - # max_supported_total_tokens = self.model.warmup(batch) - # return generate_pb2.WarmupResponse(max_supported_total_tokens=max_supported_total_tokens) - async def Prefill(self, request, context): start = time.time_ns()