mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
fix: remove unused import and refactor test
This commit is contained in:
parent
e2b394e3a0
commit
d375e1e259
@ -67,14 +67,10 @@ async def test_flash_qwen2_vl_simple_streaming(flash_qwen2, response_snapshot):
|
|||||||
count = 0
|
count = 0
|
||||||
generated = ""
|
generated = ""
|
||||||
last_response = None
|
last_response = None
|
||||||
try:
|
async for response in responses:
|
||||||
async for response in responses:
|
count += 1
|
||||||
count += 1
|
generated += response.choices[0].delta.content
|
||||||
generated += response.choices[0].delta.content
|
last_response = response
|
||||||
last_response = response
|
|
||||||
except Exception as e:
|
|
||||||
# handle when the client library raises an exception when it cant parse "[DONE]" as JSON
|
|
||||||
pass
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
generated
|
generated
|
||||||
|
@ -34,7 +34,6 @@ from text_generation_server.layers import (
|
|||||||
TensorParallelColumnLinear,
|
TensorParallelColumnLinear,
|
||||||
TensorParallelRowLinear,
|
TensorParallelRowLinear,
|
||||||
TensorParallelEmbedding,
|
TensorParallelEmbedding,
|
||||||
FastLinear,
|
|
||||||
SpeculativeHead,
|
SpeculativeHead,
|
||||||
)
|
)
|
||||||
from text_generation_server.layers.attention import (
|
from text_generation_server.layers.attention import (
|
||||||
|
Loading…
Reference in New Issue
Block a user