From d375e1e259f08d8c96126c7728ab7f7d5b73ff10 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 31 Oct 2024 19:25:35 -0400 Subject: [PATCH] fix: remove unused import and refactor test --- integration-tests/models/test_flash_qwen2_vl.py | 12 ++++-------- .../models/custom_modeling/qwen2_vl.py | 1 - 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/integration-tests/models/test_flash_qwen2_vl.py b/integration-tests/models/test_flash_qwen2_vl.py index dfbb5907..946ab2f1 100644 --- a/integration-tests/models/test_flash_qwen2_vl.py +++ b/integration-tests/models/test_flash_qwen2_vl.py @@ -67,14 +67,10 @@ async def test_flash_qwen2_vl_simple_streaming(flash_qwen2, response_snapshot): count = 0 generated = "" last_response = None - try: - async for response in responses: - count += 1 - generated += response.choices[0].delta.content - last_response = response - except Exception as e: - # handle when the client library raises an exception when it cant parse "[DONE]" as JSON - pass + async for response in responses: + count += 1 + generated += response.choices[0].delta.content + last_response = response assert ( generated diff --git a/server/text_generation_server/models/custom_modeling/qwen2_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_vl.py index bd20eea5..5936c6fe 100644 --- a/server/text_generation_server/models/custom_modeling/qwen2_vl.py +++ b/server/text_generation_server/models/custom_modeling/qwen2_vl.py @@ -34,7 +34,6 @@ from text_generation_server.layers import ( TensorParallelColumnLinear, TensorParallelRowLinear, TensorParallelEmbedding, - FastLinear, SpeculativeHead, ) from text_generation_server.layers.attention import (