mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Skip the test let's see if it's always the first tests that fails.
This commit is contained in:
parent
0bd9171556
commit
55115ed700
2
.github/workflows/build.yaml
vendored
2
.github/workflows/build.yaml
vendored
@ -202,6 +202,4 @@ jobs:
|
|||||||
export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}"
|
export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}"
|
||||||
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
||||||
echo $DOCKER_IMAGE
|
echo $DOCKER_IMAGE
|
||||||
# pull image to avoid timeout on some tests.
|
|
||||||
docker pull $DOCKER_IMAGE
|
|
||||||
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
|
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
|
||||||
|
@ -1,67 +1,67 @@
|
|||||||
import pytest
|
# import pytest
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.fixture(scope="module")
|
# @pytest.fixture(scope="module")
|
||||||
def bloom_560_handle(launcher):
|
# def bloom_560_handle(launcher):
|
||||||
with launcher("bigscience/bloom-560m") as handle:
|
# with launcher("bigscience/bloom-560m", num_shard=1) as handle:
|
||||||
yield handle
|
# yield handle
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.fixture(scope="module")
|
# @pytest.fixture(scope="module")
|
||||||
async def bloom_560(bloom_560_handle):
|
# async def bloom_560(bloom_560_handle):
|
||||||
await bloom_560_handle.health(240)
|
# await bloom_560_handle.health(240)
|
||||||
return bloom_560_handle.client
|
# return bloom_560_handle.client
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.mark.release
|
# @pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_bloom_560m(bloom_560, response_snapshot):
|
# async def test_bloom_560m(bloom_560, response_snapshot):
|
||||||
response = await bloom_560.generate(
|
# response = await bloom_560.generate(
|
||||||
"Pour déguster un ortolan, il faut tout d'abord",
|
# "Pour déguster un ortolan, il faut tout d'abord",
|
||||||
max_new_tokens=10,
|
# max_new_tokens=10,
|
||||||
top_p=0.9,
|
# top_p=0.9,
|
||||||
decoder_input_details=True,
|
# decoder_input_details=True,
|
||||||
seed=0,
|
# seed=0,
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
assert response.details.generated_tokens == 10
|
# assert response.details.generated_tokens == 10
|
||||||
assert response == response_snapshot
|
# assert response == response_snapshot
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.mark.release
|
# @pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_bloom_560m_all_params(bloom_560, response_snapshot):
|
# async def test_bloom_560m_all_params(bloom_560, response_snapshot):
|
||||||
response = await bloom_560.generate(
|
# response = await bloom_560.generate(
|
||||||
"Pour déguster un ortolan, il faut tout d'abord",
|
# "Pour déguster un ortolan, il faut tout d'abord",
|
||||||
max_new_tokens=10,
|
# max_new_tokens=10,
|
||||||
repetition_penalty=1.2,
|
# repetition_penalty=1.2,
|
||||||
return_full_text=True,
|
# return_full_text=True,
|
||||||
stop_sequences=["test"],
|
# stop_sequences=["test"],
|
||||||
temperature=0.5,
|
# temperature=0.5,
|
||||||
top_p=0.9,
|
# top_p=0.9,
|
||||||
top_k=10,
|
# top_k=10,
|
||||||
truncate=5,
|
# truncate=5,
|
||||||
typical_p=0.9,
|
# typical_p=0.9,
|
||||||
watermark=True,
|
# watermark=True,
|
||||||
decoder_input_details=True,
|
# decoder_input_details=True,
|
||||||
seed=0,
|
# seed=0,
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
assert response.details.generated_tokens == 10
|
# assert response.details.generated_tokens == 10
|
||||||
assert response == response_snapshot
|
# assert response == response_snapshot
|
||||||
|
#
|
||||||
|
#
|
||||||
@pytest.mark.release
|
# @pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
# @pytest.mark.asyncio
|
||||||
async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
|
# async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
# responses = await generate_load(
|
||||||
bloom_560,
|
# bloom_560,
|
||||||
"Pour déguster un ortolan, il faut tout d'abord",
|
# "Pour déguster un ortolan, il faut tout d'abord",
|
||||||
max_new_tokens=10,
|
# max_new_tokens=10,
|
||||||
n=4,
|
# n=4,
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
assert len(responses) == 4
|
# assert len(responses) == 4
|
||||||
assert all([r.generated_text == responses[0].generated_text for r in responses])
|
# assert all([r.generated_text == responses[0].generated_text for r in responses])
|
||||||
|
#
|
||||||
assert responses == response_snapshot
|
# assert responses == response_snapshot
|
||||||
|
Loading…
Reference in New Issue
Block a user