mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 12:54:52 +00:00
feat: add guideline to chat request and template
This commit is contained in:
parent
6e127dcc96
commit
3b25cd3213
@ -48,6 +48,7 @@ impl ChatTemplate {
|
||||
|
||||
pub(crate) fn apply(
|
||||
&self,
|
||||
guideline: Option<&str>,
|
||||
mut messages: Vec<Message>,
|
||||
grammar_with_prompt: Option<(GrammarType, String)>,
|
||||
) -> Result<String, InferError> {
|
||||
@ -65,6 +66,7 @@ impl ChatTemplate {
|
||||
|
||||
self.template
|
||||
.render(ChatTemplateInputs {
|
||||
guideline,
|
||||
messages,
|
||||
bos_token: self.bos_token.as_deref(),
|
||||
eos_token: self.eos_token.as_deref(),
|
||||
|
@ -138,13 +138,14 @@ impl Infer {
|
||||
#[instrument(skip_all)]
|
||||
pub(crate) fn apply_chat_template(
|
||||
&self,
|
||||
guideline: Option<String>,
|
||||
messages: Vec<Message>,
|
||||
grammar_with_prompt: Option<(GrammarType, String)>,
|
||||
) -> Result<String, InferError> {
|
||||
self.chat_template
|
||||
.as_ref()
|
||||
.ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
|
||||
.apply(messages, grammar_with_prompt)
|
||||
.apply(guideline.as_deref(), messages, grammar_with_prompt)
|
||||
.map_err(|e| {
|
||||
metrics::counter!("tgi_request_failure", "err" => "template").increment(1);
|
||||
tracing::error!("{e}");
|
||||
|
@ -829,6 +829,11 @@ pub(crate) struct ChatRequest {
|
||||
#[serde(default)]
|
||||
#[schema(nullable = true, default = "null", example = "null")]
|
||||
pub response_format: Option<GrammarType>,
|
||||
|
||||
/// A guideline to be used in the chat_template
|
||||
#[serde(default)]
|
||||
#[schema(nullable = true, default = "null", example = "null")]
|
||||
pub guideline: Option<String>,
|
||||
}
|
||||
|
||||
fn default_tool_prompt() -> Option<String> {
|
||||
@ -936,6 +941,7 @@ pub(crate) struct ChatTemplateInputs<'a> {
|
||||
add_generation_prompt: bool,
|
||||
tools: Option<&'a str>,
|
||||
tools_prompt: Option<&'a str>,
|
||||
guideline: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug, PartialEq)]
|
||||
|
@ -141,6 +141,7 @@ async fn get_chat_tokenize(
|
||||
tool_prompt,
|
||||
temperature,
|
||||
response_format,
|
||||
guideline,
|
||||
..
|
||||
} = req;
|
||||
|
||||
@ -151,6 +152,7 @@ async fn get_chat_tokenize(
|
||||
tools,
|
||||
tool_choice,
|
||||
&tool_prompt,
|
||||
guideline,
|
||||
messages,
|
||||
)?;
|
||||
|
||||
@ -1123,6 +1125,7 @@ async fn chat_completions(
|
||||
tool_prompt,
|
||||
temperature,
|
||||
response_format,
|
||||
guideline,
|
||||
..
|
||||
} = req;
|
||||
|
||||
@ -1142,6 +1145,7 @@ async fn chat_completions(
|
||||
tools,
|
||||
tool_choice,
|
||||
&tool_prompt,
|
||||
guideline,
|
||||
messages,
|
||||
)?;
|
||||
|
||||
@ -2402,6 +2406,7 @@ fn prepare_chat_input(
|
||||
tools: Option<Vec<Tool>>,
|
||||
tool_choice: ToolChoice,
|
||||
tool_prompt: &str,
|
||||
guideline: Option<String>,
|
||||
messages: Vec<Message>,
|
||||
) -> Result<PreparedInput, InferError> {
|
||||
if response_format.is_some() && tools.is_some() {
|
||||
@ -2411,7 +2416,7 @@ fn prepare_chat_input(
|
||||
}
|
||||
|
||||
if let Some(format) = response_format {
|
||||
let inputs = infer.apply_chat_template(messages, None)?;
|
||||
let inputs = infer.apply_chat_template(guideline, messages, None)?;
|
||||
return Ok((inputs, Some(format), None));
|
||||
}
|
||||
|
||||
@ -2423,6 +2428,6 @@ fn prepare_chat_input(
|
||||
let tools_grammar_prompt = tool_grammar
|
||||
.as_ref()
|
||||
.map(|t| (GrammarType::Json(serde_json::json!(t)), tool_prompt.into()));
|
||||
let inputs = infer.apply_chat_template(messages, tools_grammar_prompt)?;
|
||||
let inputs = infer.apply_chat_template(guideline, messages, tools_grammar_prompt)?;
|
||||
Ok((inputs, grammar, tool_grammar))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user