mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-08-02 05:10:23 +00:00
feat(trtllm): add new finish reasons
Add new finish reasons introduced in TensorRT-LLM v0.16.0.
This commit is contained in:
parent
58934c8b61
commit
c458d21d07
@ -42,6 +42,10 @@ namespace huggingface::tgi::backends::trtllm {
|
|||||||
return finish_reason_t::kEND_ID;
|
return finish_reason_t::kEND_ID;
|
||||||
case tle::FinishReason::kLENGTH:
|
case tle::FinishReason::kLENGTH:
|
||||||
return finish_reason_t::kLENGTH;
|
return finish_reason_t::kLENGTH;
|
||||||
|
case tle::FinishReason::kTIMED_OUT:
|
||||||
|
return finish_reason_t::kTIMED_OUT;
|
||||||
|
case tle::FinishReason::kCANCELLED:
|
||||||
|
return finish_reason_t::kCANCELLED;
|
||||||
default:
|
default:
|
||||||
std::unreachable();
|
std::unreachable();
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,14 @@ mod ffi {
|
|||||||
/// The request finished because the maximum number of tokens was reached.
|
/// The request finished because the maximum number of tokens was reached.
|
||||||
#[cxx_name = "kLENGTH"]
|
#[cxx_name = "kLENGTH"]
|
||||||
MaxLength = 3u8,
|
MaxLength = 3u8,
|
||||||
|
|
||||||
|
#[cxx_name = "kTIMED_OUT"]
|
||||||
|
/// The request finished because it got timed out (via the mAllotedTime parameter)
|
||||||
|
TimedOut = 4u8,
|
||||||
|
|
||||||
|
#[cxx_name = "kCANCELLED"]
|
||||||
|
/// The request was cancelled by calling cancelRequest.
|
||||||
|
Cancelled = 5u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Struct used as shared type between rust and C++ to represent the result
|
/// Struct used as shared type between rust and C++ to represent the result
|
||||||
|
Loading…
Reference in New Issue
Block a user