mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-27 13:02:12 +00:00
fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_… (#2148)
* fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_indices] Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> * Apply suggestions from code review --------- Signed-off-by: Wang, Yi A <yi.a.wang@intel.com> Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
69514868ee
commit
8721b601e3
@ -17,7 +17,7 @@ use text_generation_router::{
|
|||||||
server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig,
|
server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig,
|
||||||
};
|
};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokenizers::{processors::template::TemplateProcessing, Tokenizer};
|
use tokenizers::{processors::template::TemplateProcessing, Tokenizer, PostProcessor};
|
||||||
use tower_http::cors::AllowOrigin;
|
use tower_http::cors::AllowOrigin;
|
||||||
use tracing_subscriber::layer::SubscriberExt;
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
use tracing_subscriber::util::SubscriberInitExt;
|
use tracing_subscriber::util::SubscriberInitExt;
|
||||||
@ -309,7 +309,7 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
let mut tokenizer = Tokenizer::from_file(filename).ok();
|
let mut tokenizer = Tokenizer::from_file(filename).ok();
|
||||||
if let Some(tokenizer) = &mut tokenizer {
|
if let Some(tokenizer) = &mut tokenizer {
|
||||||
if let Some(class) = &tokenizer_config.tokenizer_class {
|
if let Some(class) = &tokenizer_config.tokenizer_class {
|
||||||
if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast") && tokenizer.get_post_processor().is_none() {
|
if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast"){
|
||||||
if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) {
|
if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) {
|
||||||
tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205");
|
tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205");
|
||||||
tokenizer.with_post_processor(post_processor);
|
tokenizer.with_post_processor(post_processor);
|
||||||
@ -577,7 +577,7 @@ pub fn create_post_processor(
|
|||||||
|
|
||||||
if add_bos_token {
|
if add_bos_token {
|
||||||
if let Some(bos) = bos_token {
|
if let Some(bos) = bos_token {
|
||||||
single.push(format!("{}:1", bos));
|
pair.push(format!("{}:1", bos));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user