mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
(misc) move to latest trtllm
This commit is contained in:
parent
507ff66692
commit
213acc6e34
@ -23,7 +23,7 @@ endif ()
|
||||
fetchcontent_declare(
|
||||
trtllm
|
||||
GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
|
||||
GIT_TAG a681853d3803ee5893307e812530b5e7004bb6e1
|
||||
GIT_TAG 32ed92e4491baf2d54682a21d247e1948cca996e
|
||||
GIT_SHALLOW FALSE
|
||||
)
|
||||
fetchcontent_makeavailable(trtllm)
|
||||
|
@ -8,18 +8,18 @@ use hashbrown::HashMap;
|
||||
use log::warn;
|
||||
use tokenizers::{Encoding, Tokenizer};
|
||||
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
|
||||
use tokio::task::{JoinHandle, spawn_blocking};
|
||||
use tokio::task::{spawn_blocking, JoinHandle};
|
||||
use tokio::time::Instant;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::{debug, error};
|
||||
use tracing::{debug, error, info};
|
||||
|
||||
use text_generation_router::{FinishReason, Token};
|
||||
use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
|
||||
use text_generation_router::infer::InferError::{GenerationError, ValidationError};
|
||||
use text_generation_router::validation::{Chunk, ValidGenerateRequest};
|
||||
use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
|
||||
use text_generation_router::validation::ValidationError::{
|
||||
EmptyInput, Grammar, TopNTokensDisabled, UnsupportedModality,
|
||||
};
|
||||
use text_generation_router::validation::{Chunk, ValidGenerateRequest};
|
||||
use text_generation_router::{FinishReason, Token};
|
||||
|
||||
use crate::errors::TensorRtLlmBackendError;
|
||||
use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl};
|
||||
@ -128,6 +128,7 @@ fn executor_status_looper(
|
||||
}
|
||||
}
|
||||
|
||||
// info!("Num response ready: {}", backend.num_responses_ready());
|
||||
if backend.num_responses_ready() > 0 {
|
||||
match backend.pin_mut().pull_tokens() {
|
||||
Ok(responses) => {
|
||||
|
Loading…
Reference in New Issue
Block a user