feat(backend): handle all the tokenization failure and send back to the client

2025-09-17 23:34:52 +00:00 · 2024-11-06 17:46:46 +01:00 · 2024-11-06 17:46:46 +01:00 · 26d0266cec
commit 26d0266cec
parent 20652824d9
1 changed files with 36 additions and 33 deletions
--- a/backends/llamacpp/src/backend.rs
+++ b/backends/llamacpp/src/backend.rs
@ -124,29 +124,31 @@ fn llama_generate_callback(
    let token = match ctx.tokenizer.decode(&[new_token_id], false) {
        Ok(text) => {
            let special = ctx.tokenizer.get_added_vocabulary().is_special_token(&text);
-            Token {
+            Ok(Token {
                id: new_token_id,
                text,
                logprob: new_token_logit,
                special,
            })
        }
-        }
+        Err(ref err) => Err(InferError::GenerationError(err.to_string())),
        Err(_) => panic!("Failed to decode token"),
    };
    // Create the streamed response
-    let response = match is_final {
+    let response = match token {
-        false => InferStreamResponse::Intermediate {
+        Ok(token) => {
            match is_final {
                false => Ok(InferStreamResponse::Intermediate {
                    token,
                    top_tokens: vec![],
-        },
+                }),
                true => {
                    // Decode the whole text
                    match ctx
                        .tokenizer
                        .decode(&ctx.generation.generated_tokens, false)
                    {
-                Ok(text) => InferStreamResponse::End {
+                        Ok(text) => Ok(InferStreamResponse::End {
                            token,
                            top_tokens: vec![],
                            generated_text: GeneratedText {
@ -157,23 +159,24 @@ fn llama_generate_callback(
                            },
                            start: ctx.start,
                            queued: ctx.start,
-                },
+                        }),
-                Err(_) => panic!("Failed to decode token"),
+                        Err(err) => Err(InferError::GenerationError(err.to_string())),
                    }
            // Stream end response
                }
            }
        }
        Err(err) => Err(err),
    };
    // Send back to the client
-    if let Err(ref _err) = ctx.stream.send(Ok(response)) {
+    let should_stop = if let Err(ref _err) = ctx.stream.send(response) {
        error!("Failed to send back the response to the client, cancelling request");
-        // TODO: cancel the request
+        true
-        return true; // should_stop
+    } else {
-    }
+        true
    };
-    // should_stop
+    should_stop
    false
 }
 unsafe fn scheduler_loop(