remove profiling

2025-06-19 15:52:08 +00:00 · 2023-04-06 17:58:54 +02:00 · 2023-04-06 17:58:54 +02:00 · c3779fa859
commit c3779fa859
parent 26fc232afb
1 changed files with 9 additions and 24 deletions
--- a/server/text_generation_server/server.py
+++ b/server/text_generation_server/server.py
@ -39,18 +39,11 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
        return generate_pb2.ClearCacheResponse()
    async def Prefill(self, request, context):
        from torch.profiler import profile, ProfilerActivity
        with profile(
            activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]
        ) as prefill_prof:
        batch = self.model.batch_type.from_pb(
            request.batch, self.model.tokenizer, self.model.device
        )
        generations, next_batch = self.model.generate_token(batch)
        prefill_prof.export_chrome_trace("prefill.json")
        self.cache.set(next_batch)
        return generate_pb2.PrefillResponse(
@ -69,20 +62,12 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
                raise ValueError(f"Batch ID {batch_pb.id} not found in cache.")
            batches.append(batch)
        from torch.profiler import profile, ProfilerActivity
        with profile(
            activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]
        ) as decode_prof:
        if len(batches) > 1:
            batch = self.model.batch_type.concatenate(batches)
        else:
            batch = batches[0]
        generations, next_batch = self.model.generate_token(batch)
        decode_prof.export_chrome_trace("decode.json")
        self.cache.set(next_batch)
        return generate_pb2.DecodeResponse(