mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
fix: add chat docs to client
This commit is contained in:
parent
c8f2081171
commit
1aa2126206
@ -79,7 +79,41 @@ class Client:
|
|||||||
top_p: Optional[float] = None,
|
top_p: Optional[float] = None,
|
||||||
tools: Optional[List[Tool]] = None,
|
tools: Optional[List[Tool]] = None,
|
||||||
):
|
):
|
||||||
""" """
|
"""
|
||||||
|
Given a list of messages, generate a response asynchronously
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages (`List[Message]`):
|
||||||
|
List of messages
|
||||||
|
frequency_penalty (`float`):
|
||||||
|
The parameter for frequency penalty. 0.0 means no penalty. See [this
|
||||||
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
||||||
|
logit_bias (`List[float]`):
|
||||||
|
Adjust the likelihood of specified tokens
|
||||||
|
logprobs (`bool`):
|
||||||
|
Include log probabilities in the response
|
||||||
|
top_logprobs (`int`):
|
||||||
|
Include the `n` most likely tokens at each step
|
||||||
|
max_tokens (`int`):
|
||||||
|
Maximum number of generated tokens
|
||||||
|
n (`int`):
|
||||||
|
Generate `n` completions
|
||||||
|
presence_penalty (`float`):
|
||||||
|
The parameter for presence penalty. 0.0 means no penalty. See [this
|
||||||
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
||||||
|
stream (`bool`):
|
||||||
|
Stream the response
|
||||||
|
seed (`int`):
|
||||||
|
Random sampling seed
|
||||||
|
temperature (`float`):
|
||||||
|
The value used to module the logits distribution.
|
||||||
|
top_p (`float`):
|
||||||
|
If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
|
||||||
|
higher are kept for generation
|
||||||
|
tools (`List[Tool]`):
|
||||||
|
List of tools to use
|
||||||
|
|
||||||
|
"""
|
||||||
request = ChatRequest(
|
request = ChatRequest(
|
||||||
model="tgi",
|
model="tgi",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
@ -379,8 +413,41 @@ class AsyncClient:
|
|||||||
top_p: Optional[float] = None,
|
top_p: Optional[float] = None,
|
||||||
tools: Optional[List[Tool]] = None,
|
tools: Optional[List[Tool]] = None,
|
||||||
):
|
):
|
||||||
""" """
|
"""
|
||||||
print("chat")
|
Given a list of messages, generate a response asynchronously
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages (`List[Message]`):
|
||||||
|
List of messages
|
||||||
|
frequency_penalty (`float`):
|
||||||
|
The parameter for frequency penalty. 0.0 means no penalty. See [this
|
||||||
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
||||||
|
logit_bias (`List[float]`):
|
||||||
|
Adjust the likelihood of specified tokens
|
||||||
|
logprobs (`bool`):
|
||||||
|
Include log probabilities in the response
|
||||||
|
top_logprobs (`int`):
|
||||||
|
Include the `n` most likely tokens at each step
|
||||||
|
max_tokens (`int`):
|
||||||
|
Maximum number of generated tokens
|
||||||
|
n (`int`):
|
||||||
|
Generate `n` completions
|
||||||
|
presence_penalty (`float`):
|
||||||
|
The parameter for presence penalty. 0.0 means no penalty. See [this
|
||||||
|
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
|
||||||
|
stream (`bool`):
|
||||||
|
Stream the response
|
||||||
|
seed (`int`):
|
||||||
|
Random sampling seed
|
||||||
|
temperature (`float`):
|
||||||
|
The value used to module the logits distribution.
|
||||||
|
top_p (`float`):
|
||||||
|
If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
|
||||||
|
higher are kept for generation
|
||||||
|
tools (`List[Tool]`):
|
||||||
|
List of tools to use
|
||||||
|
|
||||||
|
"""
|
||||||
request = ChatRequest(
|
request = ChatRequest(
|
||||||
model="tgi",
|
model="tgi",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
Loading…
Reference in New Issue
Block a user