mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
fix: remove unused import and refactor test
This commit is contained in:
parent
e2b394e3a0
commit
d375e1e259
@ -67,14 +67,10 @@ async def test_flash_qwen2_vl_simple_streaming(flash_qwen2, response_snapshot):
|
||||
count = 0
|
||||
generated = ""
|
||||
last_response = None
|
||||
try:
|
||||
async for response in responses:
|
||||
count += 1
|
||||
generated += response.choices[0].delta.content
|
||||
last_response = response
|
||||
except Exception as e:
|
||||
# handle when the client library raises an exception when it cant parse "[DONE]" as JSON
|
||||
pass
|
||||
|
||||
assert (
|
||||
generated
|
||||
|
@ -34,7 +34,6 @@ from text_generation_server.layers import (
|
||||
TensorParallelColumnLinear,
|
||||
TensorParallelRowLinear,
|
||||
TensorParallelEmbedding,
|
||||
FastLinear,
|
||||
SpeculativeHead,
|
||||
)
|
||||
from text_generation_server.layers.attention import (
|
||||
|
Loading…
Reference in New Issue
Block a user