fix: remove unused import and refactor test

2025-09-11 20:34:54 +00:00 · 2024-10-31 19:25:35 -04:00 · 2024-10-31 19:25:35 -04:00 · d375e1e259
commit d375e1e259
parent e2b394e3a0
2 changed files with 4 additions and 9 deletions
--- a/integration-tests/models/test_flash_qwen2_vl.py
+++ b/integration-tests/models/test_flash_qwen2_vl.py
@ -67,14 +67,10 @@ async def test_flash_qwen2_vl_simple_streaming(flash_qwen2, response_snapshot):
    count = 0
    generated = ""
    last_response = None
-    try:
+    async for response in responses:
-        async for response in responses:
+        count += 1
-            count += 1
+        generated += response.choices[0].delta.content
-            generated += response.choices[0].delta.content
+        last_response = response
            last_response = response
    except Exception as e:
        # handle when the client library raises an exception when it cant parse "[DONE]" as JSON
        pass
    assert (
        generated
--- a/server/text_generation_server/models/custom_modeling/qwen2_vl.py
+++ b/server/text_generation_server/models/custom_modeling/qwen2_vl.py
@ -34,7 +34,6 @@ from text_generation_server.layers import (
    TensorParallelColumnLinear,
    TensorParallelRowLinear,
    TensorParallelEmbedding,
    FastLinear,
    SpeculativeHead,
 )
 from text_generation_server.layers.attention import (