From 57b215467bc28b37a2b7a4ca98ea74b4a171d179 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Wed, 13 Nov 2024 00:22:11 +0100 Subject: [PATCH] feat(backend): simplify Rust callback --- backends/llamacpp/src/backend.rs | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs index 8e36aa63..2dd5b70d 100644 --- a/backends/llamacpp/src/backend.rs +++ b/backends/llamacpp/src/backend.rs @@ -134,23 +134,18 @@ fn llama_generate_callback( // Append the new token to the generated ones ctx.generation.generated_tokens.push(new_token_id); - // Decode token - let token = match ctx.tokenizer.decode(&[new_token_id], false) { + // Generate response + let response = match ctx.tokenizer.decode(&[new_token_id], false) { Ok(text) => { let special = ctx.tokenizer.get_added_vocabulary().is_special_token(&text); - Ok(Token { + let token = Token { id: new_token_id, text, logprob: new_token_logit, special, - }) - } - Err(ref err) => Err(InferError::GenerationError(err.to_string())), - }; + }; - // Create the streamed response - let response = match token { - Ok(token) => { + // Should we generate an ending or intermediate response? match is_final { false => Ok(InferStreamResponse::Intermediate { token, @@ -179,16 +174,14 @@ fn llama_generate_callback( } } } - Err(err) => Err(err), + Err(ref err) => Err(InferError::GenerationError(err.to_string())), }; // Send back to the client - if let Err(ref _err) = ctx.stream.send(response) { - error!("Failed to send back the response to the client, cancelling request"); - true - } else { - false - } + let status = ctx.stream.send(response).inspect_err(|err| { + error!("Failed to send back the response: {}", err); + }); + status.is_err() } fn scheduler_loop(