mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 23:42:06 +00:00
feat: use model name as adapter id in chat endpoints (#2128)
This commit is contained in:
parent
58effe78b5
commit
87ebb6477b
@ -384,7 +384,7 @@ pub struct CompletionRequest {
|
|||||||
/// UNUSED
|
/// UNUSED
|
||||||
#[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
|
#[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
|
||||||
/// ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
|
/// ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
|
||||||
pub model: String,
|
pub model: Option<String>,
|
||||||
|
|
||||||
/// The prompt to generate completions for.
|
/// The prompt to generate completions for.
|
||||||
#[schema(example = "What is Deep Learning?")]
|
#[schema(example = "What is Deep Learning?")]
|
||||||
@ -731,7 +731,7 @@ impl ChatCompletionChunk {
|
|||||||
pub(crate) struct ChatRequest {
|
pub(crate) struct ChatRequest {
|
||||||
#[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
|
#[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
|
||||||
/// [UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
|
/// [UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
|
||||||
pub model: String,
|
pub model: Option<String>,
|
||||||
|
|
||||||
/// A list of messages comprising the conversation so far.
|
/// A list of messages comprising the conversation so far.
|
||||||
#[schema(example = "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]")]
|
#[schema(example = "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]")]
|
||||||
|
@ -597,6 +597,7 @@ async fn completions(
|
|||||||
metrics::counter!("tgi_request_count").increment(1);
|
metrics::counter!("tgi_request_count").increment(1);
|
||||||
|
|
||||||
let CompletionRequest {
|
let CompletionRequest {
|
||||||
|
model,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
seed,
|
seed,
|
||||||
stop,
|
stop,
|
||||||
@ -665,7 +666,7 @@ async fn completions(
|
|||||||
seed,
|
seed,
|
||||||
top_n_tokens: None,
|
top_n_tokens: None,
|
||||||
grammar: None,
|
grammar: None,
|
||||||
..Default::default()
|
adapter_id: model.as_ref().filter(|m| *m != "tgi").map(String::from),
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
@ -1001,6 +1002,7 @@ async fn chat_completions(
|
|||||||
let span = tracing::Span::current();
|
let span = tracing::Span::current();
|
||||||
metrics::counter!("tgi_request_count").increment(1);
|
metrics::counter!("tgi_request_count").increment(1);
|
||||||
let ChatRequest {
|
let ChatRequest {
|
||||||
|
model,
|
||||||
logprobs,
|
logprobs,
|
||||||
max_tokens,
|
max_tokens,
|
||||||
messages,
|
messages,
|
||||||
@ -1106,7 +1108,7 @@ async fn chat_completions(
|
|||||||
seed,
|
seed,
|
||||||
top_n_tokens: req.top_logprobs,
|
top_n_tokens: req.top_logprobs,
|
||||||
grammar,
|
grammar,
|
||||||
..Default::default()
|
adapter_id: model.filter(|m| *m != "tgi").map(String::from),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user