From d59df84169b1cbfb5a36eb21bc6b244ae426c193 Mon Sep 17 00:00:00 2001 From: Vaibhav Srivastav Date: Tue, 13 Aug 2024 19:05:47 +0200 Subject: [PATCH] Update Gradio snippet. --- docs/source/basic_tutorials/consuming_tgi.md | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/basic_tutorials/consuming_tgi.md b/docs/source/basic_tutorials/consuming_tgi.md index c235b020..edb7ec13 100644 --- a/docs/source/basic_tutorials/consuming_tgi.md +++ b/docs/source/basic_tutorials/consuming_tgi.md @@ -127,8 +127,18 @@ client = InferenceClient(model="http://127.0.0.1:8080") def inference(message, history): partial_message = "" - for token in client.text_generation(message, max_new_tokens=20, stream=True): - partial_message += token + output = client.chat.completions.create( + model="tgi", + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": message}, + ], + stream=True, + max_tokens=1024, + ) + + for chunk in output: + partial_message += chunk.choices[0].delta.content yield partial_message gr.ChatInterface(