mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Update Gradio snippet.
This commit is contained in:
parent
6e00e05cec
commit
d59df84169
@ -127,8 +127,18 @@ client = InferenceClient(model="http://127.0.0.1:8080")
|
|||||||
|
|
||||||
def inference(message, history):
|
def inference(message, history):
|
||||||
partial_message = ""
|
partial_message = ""
|
||||||
for token in client.text_generation(message, max_new_tokens=20, stream=True):
|
output = client.chat.completions.create(
|
||||||
partial_message += token
|
model="tgi",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": "You are a helpful assistant."},
|
||||||
|
{"role": "user", "content": message},
|
||||||
|
],
|
||||||
|
stream=True,
|
||||||
|
max_tokens=1024,
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in output:
|
||||||
|
partial_message += chunk.choices[0].delta.content
|
||||||
yield partial_message
|
yield partial_message
|
||||||
|
|
||||||
gr.ChatInterface(
|
gr.ChatInterface(
|
||||||
|
Loading…
Reference in New Issue
Block a user