mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
feat(backend): simplify Rust callback
This commit is contained in:
parent
daf1631e09
commit
57b215467b
@ -134,23 +134,18 @@ fn llama_generate_callback(
|
|||||||
// Append the new token to the generated ones
|
// Append the new token to the generated ones
|
||||||
ctx.generation.generated_tokens.push(new_token_id);
|
ctx.generation.generated_tokens.push(new_token_id);
|
||||||
|
|
||||||
// Decode token
|
// Generate response
|
||||||
let token = match ctx.tokenizer.decode(&[new_token_id], false) {
|
let response = match ctx.tokenizer.decode(&[new_token_id], false) {
|
||||||
Ok(text) => {
|
Ok(text) => {
|
||||||
let special = ctx.tokenizer.get_added_vocabulary().is_special_token(&text);
|
let special = ctx.tokenizer.get_added_vocabulary().is_special_token(&text);
|
||||||
Ok(Token {
|
let token = Token {
|
||||||
id: new_token_id,
|
id: new_token_id,
|
||||||
text,
|
text,
|
||||||
logprob: new_token_logit,
|
logprob: new_token_logit,
|
||||||
special,
|
special,
|
||||||
})
|
};
|
||||||
}
|
|
||||||
Err(ref err) => Err(InferError::GenerationError(err.to_string())),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Create the streamed response
|
// Should we generate an ending or intermediate response?
|
||||||
let response = match token {
|
|
||||||
Ok(token) => {
|
|
||||||
match is_final {
|
match is_final {
|
||||||
false => Ok(InferStreamResponse::Intermediate {
|
false => Ok(InferStreamResponse::Intermediate {
|
||||||
token,
|
token,
|
||||||
@ -179,16 +174,14 @@ fn llama_generate_callback(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(err) => Err(err),
|
Err(ref err) => Err(InferError::GenerationError(err.to_string())),
|
||||||
};
|
};
|
||||||
|
|
||||||
// Send back to the client
|
// Send back to the client
|
||||||
if let Err(ref _err) = ctx.stream.send(response) {
|
let status = ctx.stream.send(response).inspect_err(|err| {
|
||||||
error!("Failed to send back the response to the client, cancelling request");
|
error!("Failed to send back the response: {}", err);
|
||||||
true
|
});
|
||||||
} else {
|
status.is_err()
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn scheduler_loop(
|
fn scheduler_loop(
|
||||||
|
Loading…
Reference in New Issue
Block a user