From c3779fa8594f5d3aff08d6838edcf25625a2fd8d Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Thu, 6 Apr 2023 17:58:54 +0200 Subject: [PATCH] remove profiling --- server/text_generation_server/server.py | 33 +++++++------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py index 929481cf..3e3789bf 100644 --- a/server/text_generation_server/server.py +++ b/server/text_generation_server/server.py @@ -39,18 +39,11 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): return generate_pb2.ClearCacheResponse() async def Prefill(self, request, context): - from torch.profiler import profile, ProfilerActivity - - with profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA] - ) as prefill_prof: - batch = self.model.batch_type.from_pb( - request.batch, self.model.tokenizer, self.model.device - ) - - generations, next_batch = self.model.generate_token(batch) - prefill_prof.export_chrome_trace("prefill.json") + batch = self.model.batch_type.from_pb( + request.batch, self.model.tokenizer, self.model.device + ) + generations, next_batch = self.model.generate_token(batch) self.cache.set(next_batch) return generate_pb2.PrefillResponse( @@ -69,20 +62,12 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): raise ValueError(f"Batch ID {batch_pb.id} not found in cache.") batches.append(batch) - from torch.profiler import profile, ProfilerActivity - - with profile( - activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA] - ) as decode_prof: - - if len(batches) > 1: - batch = self.model.batch_type.concatenate(batches) - else: - batch = batches[0] - - generations, next_batch = self.model.generate_token(batch) - decode_prof.export_chrome_trace("decode.json") + if len(batches) > 1: + batch = self.model.batch_type.concatenate(batches) + else: + batch = batches[0] + generations, next_batch = self.model.generate_token(batch) self.cache.set(next_batch) return generate_pb2.DecodeResponse(