text-generation-inference/integration-tests/models/test_continue_final_message.py
drbh d471805134
Support continue final message (#2733)
* feat: support continue_final_message param in chat request

* feat: add test for continue final message

* fix: bump openapi docs

* fix: remove continue_final_message chat request param

* fix: remove unneeded launcher args in continue test

* fix: bump test output

* fix: remove accidentally included guideline from rebase

* fix: remove guideline tests

* fix: adjust continuation tests expected text

* fix: replace expected output for continue test
2024-11-27 19:13:30 -05:00

77 lines
2.4 KiB
Python

import pytest
import requests
@pytest.fixture(scope="module")
def llama_continue_final_message_handle(launcher):
with launcher("TinyLlama/TinyLlama-1.1B-Chat-v1.0") as handle:
yield handle
@pytest.fixture(scope="module")
async def llama_continue_final_message(llama_continue_final_message_handle):
await llama_continue_final_message_handle.health(300)
return llama_continue_final_message_handle.client
def test_llama_completion_single_prompt(
llama_continue_final_message, response_snapshot
):
response = requests.post(
f"{llama_continue_final_message.base_url}/v1/chat/completions",
json={
"model": "tgi",
"messages": [
{"role": "system", "content": "system message"},
{"role": "user", "content": "Which is bigger an elephant or a mouse?"},
],
"max_tokens": 30,
"stream": False,
"seed": 1337,
},
headers=llama_continue_final_message.headers,
stream=False,
)
response = response.json()
print(response)
assert len(response["choices"]) == 1
content = response["choices"][0]["message"]["content"]
assert (
content
== "Both an elephant and a mouse are mammals. However, the differences between elephants and mice are:\n\n1"
)
assert response == response_snapshot
def test_llama_completion_single_prompt_continue(
llama_continue_final_message, response_snapshot
):
response = requests.post(
f"{llama_continue_final_message.base_url}/v1/chat/completions",
json={
"model": "tgi",
"messages": [
{"role": "system", "content": "system message"},
{"role": "user", "content": "Which is bigger an elephant or a mouse?"},
{
"role": "assistant",
"content": "the elephant, but have you heard about",
},
],
"max_tokens": 30,
"stream": False,
"seed": 1337,
},
headers=llama_continue_final_message.headers,
stream=False,
)
response = response.json()
print(response)
assert len(response["choices"]) == 1
content = response["choices"][0]["message"]["content"]
assert (
content
== " the royal mouse? It is a little more slender and only weighs around 1.5 pounds for males and 1.3 pounds"
)
assert response == response_snapshot