mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 23:42:06 +00:00
(misc) move to latest trtllm
This commit is contained in:
parent
507ff66692
commit
213acc6e34
@ -23,7 +23,7 @@ endif ()
|
|||||||
fetchcontent_declare(
|
fetchcontent_declare(
|
||||||
trtllm
|
trtllm
|
||||||
GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
|
GIT_REPOSITORY https://github.com/NVIDIA/TensorRT-LLM.git
|
||||||
GIT_TAG a681853d3803ee5893307e812530b5e7004bb6e1
|
GIT_TAG 32ed92e4491baf2d54682a21d247e1948cca996e
|
||||||
GIT_SHALLOW FALSE
|
GIT_SHALLOW FALSE
|
||||||
)
|
)
|
||||||
fetchcontent_makeavailable(trtllm)
|
fetchcontent_makeavailable(trtllm)
|
||||||
|
@ -8,18 +8,18 @@ use hashbrown::HashMap;
|
|||||||
use log::warn;
|
use log::warn;
|
||||||
use tokenizers::{Encoding, Tokenizer};
|
use tokenizers::{Encoding, Tokenizer};
|
||||||
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
|
use tokio::sync::mpsc::{unbounded_channel, UnboundedReceiver, UnboundedSender};
|
||||||
use tokio::task::{JoinHandle, spawn_blocking};
|
use tokio::task::{spawn_blocking, JoinHandle};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||||
use tracing::{debug, error};
|
use tracing::{debug, error, info};
|
||||||
|
|
||||||
use text_generation_router::{FinishReason, Token};
|
|
||||||
use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
|
|
||||||
use text_generation_router::infer::InferError::{GenerationError, ValidationError};
|
use text_generation_router::infer::InferError::{GenerationError, ValidationError};
|
||||||
use text_generation_router::validation::{Chunk, ValidGenerateRequest};
|
use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse};
|
||||||
use text_generation_router::validation::ValidationError::{
|
use text_generation_router::validation::ValidationError::{
|
||||||
EmptyInput, Grammar, TopNTokensDisabled, UnsupportedModality,
|
EmptyInput, Grammar, TopNTokensDisabled, UnsupportedModality,
|
||||||
};
|
};
|
||||||
|
use text_generation_router::validation::{Chunk, ValidGenerateRequest};
|
||||||
|
use text_generation_router::{FinishReason, Token};
|
||||||
|
|
||||||
use crate::errors::TensorRtLlmBackendError;
|
use crate::errors::TensorRtLlmBackendError;
|
||||||
use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl};
|
use crate::ffi::{create_tensorrt_llm_backend, GenerationStep, TensorRtLlmBackendImpl};
|
||||||
@ -128,6 +128,7 @@ fn executor_status_looper(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// info!("Num response ready: {}", backend.num_responses_ready());
|
||||||
if backend.num_responses_ready() > 0 {
|
if backend.num_responses_ready() > 0 {
|
||||||
match backend.pin_mut().pull_tokens() {
|
match backend.pin_mut().pull_tokens() {
|
||||||
Ok(responses) => {
|
Ok(responses) => {
|
||||||
|
Loading…
Reference in New Issue
Block a user