mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
Remove generated files.
This commit is contained in:
parent
79469f5f39
commit
a31db04709
2
.gitignore
vendored
2
.gitignore
vendored
@ -5,6 +5,8 @@ router/tokenizer.json
|
|||||||
|
|
||||||
backends/v2/src/client/pb
|
backends/v2/src/client/pb
|
||||||
backends/v3/src/client/pb
|
backends/v3/src/client/pb
|
||||||
|
backends/client/src/v2/pb
|
||||||
|
backends/client/src/v3/pb
|
||||||
|
|
||||||
# ROCm auto-generated files
|
# ROCm auto-generated files
|
||||||
*.hip
|
*.hip
|
||||||
|
@ -1,613 +0,0 @@
|
|||||||
// This file is @generated by prost-build.
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct HealthRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct HealthResponse {}
|
|
||||||
/// / Empty request
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct InfoRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct InfoResponse {
|
|
||||||
#[prost(bool, tag = "1")]
|
|
||||||
pub requires_padding: bool,
|
|
||||||
#[prost(string, tag = "2")]
|
|
||||||
pub dtype: ::prost::alloc::string::String,
|
|
||||||
#[prost(string, tag = "3")]
|
|
||||||
pub device_type: ::prost::alloc::string::String,
|
|
||||||
#[prost(uint32, optional, tag = "4")]
|
|
||||||
pub window_size: ::core::option::Option<u32>,
|
|
||||||
#[prost(uint32, tag = "5")]
|
|
||||||
pub speculate: u32,
|
|
||||||
}
|
|
||||||
/// / Empty request
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ServiceDiscoveryRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ServiceDiscoveryResponse {
|
|
||||||
/// / Other shards urls
|
|
||||||
#[prost(string, repeated, tag = "1")]
|
|
||||||
pub urls: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ClearCacheRequest {
|
|
||||||
/// / Optional batch id
|
|
||||||
#[prost(uint64, optional, tag = "1")]
|
|
||||||
pub id: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
/// / Empty response
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ClearCacheResponse {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct NextTokenChooserParameters {
|
|
||||||
/// / exponential scaling output probability distribution
|
|
||||||
#[prost(float, tag = "1")]
|
|
||||||
pub temperature: f32,
|
|
||||||
/// / restricting to the k highest probability elements
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub top_k: u32,
|
|
||||||
/// / restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
|
||||||
#[prost(float, tag = "3")]
|
|
||||||
pub top_p: f32,
|
|
||||||
/// / restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
|
||||||
#[prost(float, tag = "4")]
|
|
||||||
pub typical_p: f32,
|
|
||||||
/// / apply sampling on the logits
|
|
||||||
#[prost(bool, tag = "5")]
|
|
||||||
pub do_sample: bool,
|
|
||||||
/// / random seed for sampling
|
|
||||||
#[prost(uint64, tag = "6")]
|
|
||||||
pub seed: u64,
|
|
||||||
/// / repetition penalty
|
|
||||||
#[prost(float, tag = "7")]
|
|
||||||
pub repetition_penalty: f32,
|
|
||||||
/// / frequency penalty
|
|
||||||
#[prost(float, tag = "9")]
|
|
||||||
pub frequency_penalty: f32,
|
|
||||||
/// / token watermarking using "A Watermark for Large Language Models"
|
|
||||||
#[prost(bool, tag = "8")]
|
|
||||||
pub watermark: bool,
|
|
||||||
/// / grammar (applied if not empty)
|
|
||||||
#[prost(string, tag = "10")]
|
|
||||||
pub grammar: ::prost::alloc::string::String,
|
|
||||||
/// / grammar type
|
|
||||||
#[prost(enumeration = "GrammarType", tag = "11")]
|
|
||||||
pub grammar_type: i32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct StoppingCriteriaParameters {
|
|
||||||
/// / Maximum number of generated tokens
|
|
||||||
#[prost(uint32, tag = "1")]
|
|
||||||
pub max_new_tokens: u32,
|
|
||||||
/// / Optional stopping sequences
|
|
||||||
#[prost(string, repeated, tag = "2")]
|
|
||||||
pub stop_sequences: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
/// / Ignore end of sequence token
|
|
||||||
/// / used for benchmarking
|
|
||||||
#[prost(bool, tag = "3")]
|
|
||||||
pub ignore_eos_token: bool,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Request {
|
|
||||||
/// / Request ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / The generation context
|
|
||||||
#[prost(string, tag = "2")]
|
|
||||||
pub inputs: ::prost::alloc::string::String,
|
|
||||||
/// / Context truncation
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub truncate: u32,
|
|
||||||
/// / Next Token Chooser Parameters
|
|
||||||
#[prost(message, optional, tag = "4")]
|
|
||||||
pub parameters: ::core::option::Option<NextTokenChooserParameters>,
|
|
||||||
/// / Stopping Criteria Parameters
|
|
||||||
#[prost(message, optional, tag = "5")]
|
|
||||||
pub stopping_parameters: ::core::option::Option<StoppingCriteriaParameters>,
|
|
||||||
/// / Return prefill logprobs
|
|
||||||
#[prost(bool, tag = "6")]
|
|
||||||
pub prefill_logprobs: bool,
|
|
||||||
/// / Return most likely n tokens
|
|
||||||
#[prost(uint32, tag = "7")]
|
|
||||||
pub top_n_tokens: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Batch {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / Individual requests
|
|
||||||
#[prost(message, repeated, tag = "2")]
|
|
||||||
pub requests: ::prost::alloc::vec::Vec<Request>,
|
|
||||||
/// / Batch size (==len(requests))
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub size: u32,
|
|
||||||
/// / Maximum number of tokens this batch will grow to
|
|
||||||
#[prost(uint32, tag = "4")]
|
|
||||||
pub max_tokens: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct CachedBatch {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / Individual requests ids
|
|
||||||
#[prost(uint64, repeated, tag = "2")]
|
|
||||||
pub request_ids: ::prost::alloc::vec::Vec<u64>,
|
|
||||||
/// / Batch size (==len(requests))
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub size: u32,
|
|
||||||
/// / Maximum number of tokens this batch will grow to
|
|
||||||
#[prost(uint32, tag = "4")]
|
|
||||||
pub max_tokens: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct GeneratedText {
|
|
||||||
/// / Output
|
|
||||||
#[prost(string, tag = "1")]
|
|
||||||
pub text: ::prost::alloc::string::String,
|
|
||||||
/// / Number of generated tokens
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub generated_tokens: u32,
|
|
||||||
/// / Finish reason
|
|
||||||
#[prost(enumeration = "FinishReason", tag = "3")]
|
|
||||||
pub finish_reason: i32,
|
|
||||||
/// / Seed
|
|
||||||
#[prost(uint64, optional, tag = "4")]
|
|
||||||
pub seed: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Tokens {
|
|
||||||
/// / Token IDs
|
|
||||||
#[prost(uint32, repeated, tag = "1")]
|
|
||||||
pub ids: ::prost::alloc::vec::Vec<u32>,
|
|
||||||
/// / Logprobs
|
|
||||||
#[prost(float, repeated, tag = "2")]
|
|
||||||
pub logprobs: ::prost::alloc::vec::Vec<f32>,
|
|
||||||
/// / tokens
|
|
||||||
#[prost(string, repeated, tag = "3")]
|
|
||||||
pub texts: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
/// / special
|
|
||||||
#[prost(bool, repeated, tag = "4")]
|
|
||||||
pub is_special: ::prost::alloc::vec::Vec<bool>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Generation {
|
|
||||||
/// / Request ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub request_id: u64,
|
|
||||||
/// / Prefill tokens (optional)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub prefill_tokens: ::core::option::Option<Tokens>,
|
|
||||||
#[prost(message, optional, tag = "3")]
|
|
||||||
pub tokens: ::core::option::Option<Tokens>,
|
|
||||||
/// / Complete generated text
|
|
||||||
#[prost(message, optional, tag = "4")]
|
|
||||||
pub generated_text: ::core::option::Option<GeneratedText>,
|
|
||||||
/// / Top tokens
|
|
||||||
#[prost(message, repeated, tag = "5")]
|
|
||||||
pub top_tokens: ::prost::alloc::vec::Vec<Tokens>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct FilterBatchRequest {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub batch_id: u64,
|
|
||||||
/// / Requests to keep
|
|
||||||
#[prost(uint64, repeated, tag = "2")]
|
|
||||||
pub request_ids: ::prost::alloc::vec::Vec<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct FilterBatchResponse {
|
|
||||||
/// / Filtered Batch (cached)
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct PrefillRequest {
|
|
||||||
/// / Batch
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<Batch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct PrefillResponse {
|
|
||||||
/// / Generation
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub generations: ::prost::alloc::vec::Vec<Generation>,
|
|
||||||
/// / Next batch (cached)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
/// / Forward elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "3")]
|
|
||||||
pub forward_ns: u64,
|
|
||||||
/// / Decode elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "4")]
|
|
||||||
pub decode_ns: u64,
|
|
||||||
/// / Total elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "5")]
|
|
||||||
pub total_ns: u64,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct DecodeRequest {
|
|
||||||
/// / Cached batches
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub batches: ::prost::alloc::vec::Vec<CachedBatch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct DecodeResponse {
|
|
||||||
/// / Decodes
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub generations: ::prost::alloc::vec::Vec<Generation>,
|
|
||||||
/// / Next batch (cached)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
/// / Forward elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "3")]
|
|
||||||
pub forward_ns: u64,
|
|
||||||
/// / Decode elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "4")]
|
|
||||||
pub decode_ns: u64,
|
|
||||||
/// / Total elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "5")]
|
|
||||||
pub total_ns: u64,
|
|
||||||
/// / Concatenate elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, optional, tag = "6")]
|
|
||||||
pub concat_ns: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct WarmupRequest {
|
|
||||||
/// / Batch to warmup on
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<Batch>,
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub max_input_length: u32,
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub max_prefill_tokens: u32,
|
|
||||||
#[prost(uint32, tag = "4")]
|
|
||||||
pub max_total_tokens: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct WarmupResponse {
|
|
||||||
/// / Maximum number of tokens supported by the model
|
|
||||||
#[prost(uint32, optional, tag = "1")]
|
|
||||||
pub max_supported_total_tokens: ::core::option::Option<u32>,
|
|
||||||
}
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
|
|
||||||
#[repr(i32)]
|
|
||||||
pub enum GrammarType {
|
|
||||||
None = 0,
|
|
||||||
Json = 1,
|
|
||||||
Regex = 2,
|
|
||||||
}
|
|
||||||
impl GrammarType {
|
|
||||||
/// String value of the enum field names used in the ProtoBuf definition.
|
|
||||||
///
|
|
||||||
/// The values are not transformed in any way and thus are considered stable
|
|
||||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
|
||||||
pub fn as_str_name(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
GrammarType::None => "GRAMMAR_TYPE_NONE",
|
|
||||||
GrammarType::Json => "GRAMMAR_TYPE_JSON",
|
|
||||||
GrammarType::Regex => "GRAMMAR_TYPE_REGEX",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
|
||||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
|
||||||
match value {
|
|
||||||
"GRAMMAR_TYPE_NONE" => Some(Self::None),
|
|
||||||
"GRAMMAR_TYPE_JSON" => Some(Self::Json),
|
|
||||||
"GRAMMAR_TYPE_REGEX" => Some(Self::Regex),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
|
|
||||||
#[repr(i32)]
|
|
||||||
pub enum FinishReason {
|
|
||||||
Length = 0,
|
|
||||||
EosToken = 1,
|
|
||||||
StopSequence = 2,
|
|
||||||
}
|
|
||||||
impl FinishReason {
|
|
||||||
/// String value of the enum field names used in the ProtoBuf definition.
|
|
||||||
///
|
|
||||||
/// The values are not transformed in any way and thus are considered stable
|
|
||||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
|
||||||
pub fn as_str_name(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
FinishReason::Length => "FINISH_REASON_LENGTH",
|
|
||||||
FinishReason::EosToken => "FINISH_REASON_EOS_TOKEN",
|
|
||||||
FinishReason::StopSequence => "FINISH_REASON_STOP_SEQUENCE",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
|
||||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
|
||||||
match value {
|
|
||||||
"FINISH_REASON_LENGTH" => Some(Self::Length),
|
|
||||||
"FINISH_REASON_EOS_TOKEN" => Some(Self::EosToken),
|
|
||||||
"FINISH_REASON_STOP_SEQUENCE" => Some(Self::StopSequence),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Generated client implementations.
|
|
||||||
pub mod text_generation_service_client {
|
|
||||||
#![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
|
|
||||||
use tonic::codegen::http::Uri;
|
|
||||||
use tonic::codegen::*;
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct TextGenerationServiceClient<T> {
|
|
||||||
inner: tonic::client::Grpc<T>,
|
|
||||||
}
|
|
||||||
impl TextGenerationServiceClient<tonic::transport::Channel> {
|
|
||||||
/// Attempt to create a new client by connecting to a given endpoint.
|
|
||||||
pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
|
|
||||||
where
|
|
||||||
D: TryInto<tonic::transport::Endpoint>,
|
|
||||||
D::Error: Into<StdError>,
|
|
||||||
{
|
|
||||||
let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
|
|
||||||
Ok(Self::new(conn))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<T> TextGenerationServiceClient<T>
|
|
||||||
where
|
|
||||||
T: tonic::client::GrpcService<tonic::body::BoxBody>,
|
|
||||||
T::Error: Into<StdError>,
|
|
||||||
T::ResponseBody: Body<Data = Bytes> + Send + 'static,
|
|
||||||
<T::ResponseBody as Body>::Error: Into<StdError> + Send,
|
|
||||||
{
|
|
||||||
pub fn new(inner: T) -> Self {
|
|
||||||
let inner = tonic::client::Grpc::new(inner);
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
pub fn with_origin(inner: T, origin: Uri) -> Self {
|
|
||||||
let inner = tonic::client::Grpc::with_origin(inner, origin);
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
pub fn with_interceptor<F>(
|
|
||||||
inner: T,
|
|
||||||
interceptor: F,
|
|
||||||
) -> TextGenerationServiceClient<InterceptedService<T, F>>
|
|
||||||
where
|
|
||||||
F: tonic::service::Interceptor,
|
|
||||||
T::ResponseBody: Default,
|
|
||||||
T: tonic::codegen::Service<
|
|
||||||
http::Request<tonic::body::BoxBody>,
|
|
||||||
Response = http::Response<
|
|
||||||
<T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
|
|
||||||
>,
|
|
||||||
>,
|
|
||||||
<T as tonic::codegen::Service<http::Request<tonic::body::BoxBody>>>::Error:
|
|
||||||
Into<StdError> + Send + Sync,
|
|
||||||
{
|
|
||||||
TextGenerationServiceClient::new(InterceptedService::new(inner, interceptor))
|
|
||||||
}
|
|
||||||
/// Compress requests with the given encoding.
|
|
||||||
///
|
|
||||||
/// This requires the server to support it otherwise it might respond with an
|
|
||||||
/// error.
|
|
||||||
#[must_use]
|
|
||||||
pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
|
||||||
self.inner = self.inner.send_compressed(encoding);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Enable decompressing responses.
|
|
||||||
#[must_use]
|
|
||||||
pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
|
||||||
self.inner = self.inner.accept_compressed(encoding);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Limits the maximum size of a decoded message.
|
|
||||||
///
|
|
||||||
/// Default: `4MB`
|
|
||||||
#[must_use]
|
|
||||||
pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
|
|
||||||
self.inner = self.inner.max_decoding_message_size(limit);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Limits the maximum size of an encoded message.
|
|
||||||
///
|
|
||||||
/// Default: `usize::MAX`
|
|
||||||
#[must_use]
|
|
||||||
pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
|
|
||||||
self.inner = self.inner.max_encoding_message_size(limit);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// / Model Info
|
|
||||||
pub async fn info(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::InfoRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::InfoResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v2.TextGenerationService/Info");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut()
|
|
||||||
.insert(GrpcMethod::new("generate.v2.TextGenerationService", "Info"));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Service discovery
|
|
||||||
pub async fn service_discovery(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::ServiceDiscoveryRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::ServiceDiscoveryResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v2.TextGenerationService/ServiceDiscovery",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"ServiceDiscovery",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Empties batch cache
|
|
||||||
pub async fn clear_cache(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::ClearCacheRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::ClearCacheResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v2.TextGenerationService/ClearCache",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"ClearCache",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Remove requests from a cached batch
|
|
||||||
pub async fn filter_batch(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::FilterBatchRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::FilterBatchResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v2.TextGenerationService/FilterBatch",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"FilterBatch",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Warmup the model and compute max cache size
|
|
||||||
pub async fn warmup(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::WarmupRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::WarmupResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v2.TextGenerationService/Warmup");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"Warmup",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Prefill batch and decode first token
|
|
||||||
pub async fn prefill(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::PrefillRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::PrefillResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v2.TextGenerationService/Prefill");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"Prefill",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Decode token for a list of prefilled batches
|
|
||||||
pub async fn decode(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::DecodeRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::DecodeResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v2.TextGenerationService/Decode");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"Decode",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Health check
|
|
||||||
pub async fn health(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::HealthRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::HealthResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v2.TextGenerationService/Health");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v2.TextGenerationService",
|
|
||||||
"Health",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
// This file is @generated by prost-build.
|
|
||||||
pub mod generate {
|
|
||||||
pub mod v2 {
|
|
||||||
include!("generate.v2.rs");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,699 +0,0 @@
|
|||||||
// This file is @generated by prost-build.
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct HealthRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct HealthResponse {}
|
|
||||||
/// / Empty request
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct InfoRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct InfoResponse {
|
|
||||||
#[prost(bool, tag = "1")]
|
|
||||||
pub requires_padding: bool,
|
|
||||||
#[prost(string, tag = "2")]
|
|
||||||
pub dtype: ::prost::alloc::string::String,
|
|
||||||
#[prost(string, tag = "3")]
|
|
||||||
pub device_type: ::prost::alloc::string::String,
|
|
||||||
#[prost(uint32, optional, tag = "4")]
|
|
||||||
pub window_size: ::core::option::Option<u32>,
|
|
||||||
#[prost(uint32, tag = "5")]
|
|
||||||
pub speculate: u32,
|
|
||||||
#[prost(bool, tag = "6")]
|
|
||||||
pub support_chunking: bool,
|
|
||||||
#[prost(bool, tag = "7")]
|
|
||||||
pub use_prefix_caching: bool,
|
|
||||||
#[prost(string, tag = "8")]
|
|
||||||
pub attention_impl: ::prost::alloc::string::String,
|
|
||||||
#[prost(uint32, tag = "9")]
|
|
||||||
pub block_size: u32,
|
|
||||||
}
|
|
||||||
/// / Empty request
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ServiceDiscoveryRequest {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ServiceDiscoveryResponse {
|
|
||||||
/// / Other shards urls
|
|
||||||
#[prost(string, repeated, tag = "1")]
|
|
||||||
pub urls: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ClearCacheRequest {
|
|
||||||
/// / Optional batch id
|
|
||||||
#[prost(uint64, optional, tag = "1")]
|
|
||||||
pub id: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
/// / Empty response
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct ClearCacheResponse {}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Image {
|
|
||||||
/// / Binary image data.
|
|
||||||
#[prost(bytes = "vec", tag = "1")]
|
|
||||||
pub data: ::prost::alloc::vec::Vec<u8>,
|
|
||||||
/// / Image MIME type.
|
|
||||||
#[prost(string, tag = "2")]
|
|
||||||
pub mimetype: ::prost::alloc::string::String,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct InputChunk {
|
|
||||||
#[prost(oneof = "input_chunk::Chunk", tags = "1, 2")]
|
|
||||||
pub chunk: ::core::option::Option<input_chunk::Chunk>,
|
|
||||||
}
|
|
||||||
/// Nested message and enum types in `InputChunk`.
|
|
||||||
pub mod input_chunk {
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Oneof)]
|
|
||||||
pub enum Chunk {
|
|
||||||
/// / Plain text data
|
|
||||||
#[prost(string, tag = "1")]
|
|
||||||
Text(::prost::alloc::string::String),
|
|
||||||
/// / Image data
|
|
||||||
#[prost(message, tag = "2")]
|
|
||||||
Image(super::Image),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Input {
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub chunks: ::prost::alloc::vec::Vec<InputChunk>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct NextTokenChooserParameters {
|
|
||||||
/// / exponential scaling output probability distribution
|
|
||||||
#[prost(float, tag = "1")]
|
|
||||||
pub temperature: f32,
|
|
||||||
/// / restricting to the k highest probability elements
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub top_k: u32,
|
|
||||||
/// / restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
|
||||||
#[prost(float, tag = "3")]
|
|
||||||
pub top_p: f32,
|
|
||||||
/// / restricting to top tokens summing to prob_cut_off <= prob_cut_off
|
|
||||||
#[prost(float, tag = "4")]
|
|
||||||
pub typical_p: f32,
|
|
||||||
/// / apply sampling on the logits
|
|
||||||
#[prost(bool, tag = "5")]
|
|
||||||
pub do_sample: bool,
|
|
||||||
/// / random seed for sampling
|
|
||||||
#[prost(uint64, tag = "6")]
|
|
||||||
pub seed: u64,
|
|
||||||
/// / repetition penalty
|
|
||||||
#[prost(float, tag = "7")]
|
|
||||||
pub repetition_penalty: f32,
|
|
||||||
/// / frequency penalty
|
|
||||||
#[prost(float, tag = "9")]
|
|
||||||
pub frequency_penalty: f32,
|
|
||||||
/// / token watermarking using "A Watermark for Large Language Models"
|
|
||||||
#[prost(bool, tag = "8")]
|
|
||||||
pub watermark: bool,
|
|
||||||
/// / grammar (applied if not empty)
|
|
||||||
#[prost(string, tag = "10")]
|
|
||||||
pub grammar: ::prost::alloc::string::String,
|
|
||||||
/// / grammar type
|
|
||||||
#[prost(enumeration = "GrammarType", tag = "11")]
|
|
||||||
pub grammar_type: i32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct StoppingCriteriaParameters {
|
|
||||||
/// / Maximum number of generated tokens
|
|
||||||
#[prost(uint32, tag = "1")]
|
|
||||||
pub max_new_tokens: u32,
|
|
||||||
/// / Optional stopping sequences
|
|
||||||
#[prost(string, repeated, tag = "2")]
|
|
||||||
pub stop_sequences: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
/// / Ignore end of sequence token
|
|
||||||
/// / used for benchmarking
|
|
||||||
#[prost(bool, tag = "3")]
|
|
||||||
pub ignore_eos_token: bool,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Request {
|
|
||||||
/// / Request ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / The generation context as chunks
|
|
||||||
#[prost(message, optional, tag = "8")]
|
|
||||||
pub input_chunks: ::core::option::Option<Input>,
|
|
||||||
/// / The generation context, stringified input_chunks
|
|
||||||
#[prost(string, tag = "2")]
|
|
||||||
pub inputs: ::prost::alloc::string::String,
|
|
||||||
/// / Context truncation
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub truncate: u32,
|
|
||||||
/// / Next Token Chooser Parameters
|
|
||||||
#[prost(message, optional, tag = "4")]
|
|
||||||
pub parameters: ::core::option::Option<NextTokenChooserParameters>,
|
|
||||||
/// / Stopping Criteria Parameters
|
|
||||||
#[prost(message, optional, tag = "5")]
|
|
||||||
pub stopping_parameters: ::core::option::Option<StoppingCriteriaParameters>,
|
|
||||||
/// / Return prefill logprobs
|
|
||||||
#[prost(bool, tag = "6")]
|
|
||||||
pub prefill_logprobs: bool,
|
|
||||||
/// / Return most likely n tokens
|
|
||||||
#[prost(uint32, tag = "7")]
|
|
||||||
pub top_n_tokens: u32,
|
|
||||||
/// / Paged attention blocks
|
|
||||||
#[prost(uint32, repeated, tag = "9")]
|
|
||||||
pub blocks: ::prost::alloc::vec::Vec<u32>,
|
|
||||||
/// / Paged attention slots
|
|
||||||
#[prost(uint32, repeated, tag = "10")]
|
|
||||||
pub slots: ::prost::alloc::vec::Vec<u32>,
|
|
||||||
/// / LORA adapter index
|
|
||||||
#[prost(string, optional, tag = "11")]
|
|
||||||
pub adapter_id: ::core::option::Option<::prost::alloc::string::String>,
|
|
||||||
/// / Tokens that can be retrieved from the KV cache.
|
|
||||||
/// / This value is set for the first prefill and never reset
|
|
||||||
#[prost(uint32, tag = "12")]
|
|
||||||
pub cache_len: u32,
|
|
||||||
/// / Context truncation
|
|
||||||
#[prost(bool, tag = "13")]
|
|
||||||
pub add_special_tokens: bool,
|
|
||||||
/// / Chunk of tokens that must be computed for the first prefill
|
|
||||||
/// / This value is set for the first prefill and never reset
|
|
||||||
#[prost(uint32, optional, tag = "14")]
|
|
||||||
pub chunk_len: ::core::option::Option<u32>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Batch {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / Individual requests
|
|
||||||
#[prost(message, repeated, tag = "2")]
|
|
||||||
pub requests: ::prost::alloc::vec::Vec<Request>,
|
|
||||||
/// / Batch size (==len(requests))
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub size: u32,
|
|
||||||
/// / Maximum number of tokens this batch will grow to
|
|
||||||
#[prost(uint32, tag = "4")]
|
|
||||||
pub max_tokens: u32,
|
|
||||||
/// / Maximum number of Paged Attention blocks
|
|
||||||
#[prost(uint32, tag = "5")]
|
|
||||||
pub max_blocks: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct CachedBatch {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub id: u64,
|
|
||||||
/// / Individual requests ids
|
|
||||||
#[prost(uint64, repeated, tag = "2")]
|
|
||||||
pub request_ids: ::prost::alloc::vec::Vec<u64>,
|
|
||||||
/// / Batch size (==len(requests))
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub size: u32,
|
|
||||||
/// / Maximum number of tokens this batch will grow to
|
|
||||||
#[prost(uint32, tag = "4")]
|
|
||||||
pub max_tokens: u32,
|
|
||||||
/// / Number of tokens in the next forward
|
|
||||||
#[prost(uint32, tag = "5")]
|
|
||||||
pub current_tokens: u32,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct GeneratedText {
|
|
||||||
/// / Output
|
|
||||||
#[prost(string, tag = "1")]
|
|
||||||
pub text: ::prost::alloc::string::String,
|
|
||||||
/// / Number of generated tokens
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub generated_tokens: u32,
|
|
||||||
/// / Finish reason
|
|
||||||
#[prost(enumeration = "FinishReason", tag = "3")]
|
|
||||||
pub finish_reason: i32,
|
|
||||||
/// / Seed
|
|
||||||
#[prost(uint64, optional, tag = "4")]
|
|
||||||
pub seed: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Tokens {
|
|
||||||
/// / Token IDs
|
|
||||||
#[prost(uint32, repeated, tag = "1")]
|
|
||||||
pub ids: ::prost::alloc::vec::Vec<u32>,
|
|
||||||
/// / Logprobs
|
|
||||||
#[prost(float, repeated, tag = "2")]
|
|
||||||
pub logprobs: ::prost::alloc::vec::Vec<f32>,
|
|
||||||
/// / tokens
|
|
||||||
#[prost(string, repeated, tag = "3")]
|
|
||||||
pub texts: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
|
|
||||||
/// / special
|
|
||||||
#[prost(bool, repeated, tag = "4")]
|
|
||||||
pub is_special: ::prost::alloc::vec::Vec<bool>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct Generation {
|
|
||||||
/// / Request ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub request_id: u64,
|
|
||||||
/// / Prefill tokens (optional)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub prefill_tokens: ::core::option::Option<Tokens>,
|
|
||||||
#[prost(message, optional, tag = "3")]
|
|
||||||
pub tokens: ::core::option::Option<Tokens>,
|
|
||||||
/// / Complete generated text
|
|
||||||
#[prost(message, optional, tag = "4")]
|
|
||||||
pub generated_text: ::core::option::Option<GeneratedText>,
|
|
||||||
/// / Top tokens
|
|
||||||
#[prost(message, repeated, tag = "5")]
|
|
||||||
pub top_tokens: ::prost::alloc::vec::Vec<Tokens>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct FilterBatchRequest {
|
|
||||||
/// / Batch ID
|
|
||||||
#[prost(uint64, tag = "1")]
|
|
||||||
pub batch_id: u64,
|
|
||||||
/// / Requests to keep
|
|
||||||
#[prost(uint64, repeated, tag = "2")]
|
|
||||||
pub request_ids: ::prost::alloc::vec::Vec<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct FilterBatchResponse {
|
|
||||||
/// / Filtered Batch (cached)
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct PrefillRequest {
|
|
||||||
/// / Batch
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<Batch>,
|
|
||||||
/// / Optional cached batch
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub cached_batch: ::core::option::Option<CachedBatch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct PrefillResponse {
|
|
||||||
/// / Generation
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub generations: ::prost::alloc::vec::Vec<Generation>,
|
|
||||||
/// / Next batch (cached)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
/// / Forward elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "3")]
|
|
||||||
pub forward_ns: u64,
|
|
||||||
/// / Decode elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "4")]
|
|
||||||
pub decode_ns: u64,
|
|
||||||
/// / Total elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "5")]
|
|
||||||
pub total_ns: u64,
|
|
||||||
/// / Concatenate elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, optional, tag = "6")]
|
|
||||||
pub concat_ns: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct DecodeRequest {
|
|
||||||
/// / Cached batches
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub batches: ::prost::alloc::vec::Vec<CachedBatch>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct DecodeResponse {
|
|
||||||
/// / Decodes
|
|
||||||
#[prost(message, repeated, tag = "1")]
|
|
||||||
pub generations: ::prost::alloc::vec::Vec<Generation>,
|
|
||||||
/// / Next batch (cached)
|
|
||||||
#[prost(message, optional, tag = "2")]
|
|
||||||
pub batch: ::core::option::Option<CachedBatch>,
|
|
||||||
/// / Forward elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "3")]
|
|
||||||
pub forward_ns: u64,
|
|
||||||
/// / Decode elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "4")]
|
|
||||||
pub decode_ns: u64,
|
|
||||||
/// / Total elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, tag = "5")]
|
|
||||||
pub total_ns: u64,
|
|
||||||
/// / Concatenate elapsed time in nanoseconds
|
|
||||||
#[prost(uint64, optional, tag = "6")]
|
|
||||||
pub concat_ns: ::core::option::Option<u64>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct WarmupRequest {
|
|
||||||
/// / Batch to warmup on
|
|
||||||
#[prost(message, optional, tag = "1")]
|
|
||||||
pub batch: ::core::option::Option<Batch>,
|
|
||||||
#[prost(uint32, optional, tag = "2")]
|
|
||||||
pub max_input_tokens: ::core::option::Option<u32>,
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub max_prefill_tokens: u32,
|
|
||||||
#[prost(uint32, optional, tag = "4")]
|
|
||||||
pub max_total_tokens: ::core::option::Option<u32>,
|
|
||||||
}
|
|
||||||
#[allow(clippy::derive_partial_eq_without_eq)]
|
|
||||||
#[derive(Clone, PartialEq, ::prost::Message)]
|
|
||||||
pub struct WarmupResponse {
|
|
||||||
/// / Maximum number of tokens supported by the model
|
|
||||||
#[prost(uint32, optional, tag = "1")]
|
|
||||||
pub max_supported_total_tokens: ::core::option::Option<u32>,
|
|
||||||
/// / Maximum input tokens by clients should be equal to request value if it's set
|
|
||||||
/// / Otherwise warmup automatically allocates a value here
|
|
||||||
#[prost(uint32, tag = "2")]
|
|
||||||
pub max_input_tokens: u32,
|
|
||||||
/// / Maximum total tokens by clients should be equal to request value if it's set
|
|
||||||
/// / Otherwise warmup automatically allocates a value here
|
|
||||||
#[prost(uint32, tag = "3")]
|
|
||||||
pub max_total_tokens: u32,
|
|
||||||
}
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
|
|
||||||
#[repr(i32)]
|
|
||||||
pub enum GrammarType {
|
|
||||||
None = 0,
|
|
||||||
Json = 1,
|
|
||||||
Regex = 2,
|
|
||||||
}
|
|
||||||
impl GrammarType {
|
|
||||||
/// String value of the enum field names used in the ProtoBuf definition.
|
|
||||||
///
|
|
||||||
/// The values are not transformed in any way and thus are considered stable
|
|
||||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
|
||||||
pub fn as_str_name(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
GrammarType::None => "GRAMMAR_TYPE_NONE",
|
|
||||||
GrammarType::Json => "GRAMMAR_TYPE_JSON",
|
|
||||||
GrammarType::Regex => "GRAMMAR_TYPE_REGEX",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
|
||||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
|
||||||
match value {
|
|
||||||
"GRAMMAR_TYPE_NONE" => Some(Self::None),
|
|
||||||
"GRAMMAR_TYPE_JSON" => Some(Self::Json),
|
|
||||||
"GRAMMAR_TYPE_REGEX" => Some(Self::Regex),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
|
|
||||||
#[repr(i32)]
|
|
||||||
pub enum FinishReason {
|
|
||||||
Length = 0,
|
|
||||||
EosToken = 1,
|
|
||||||
StopSequence = 2,
|
|
||||||
}
|
|
||||||
impl FinishReason {
|
|
||||||
/// String value of the enum field names used in the ProtoBuf definition.
|
|
||||||
///
|
|
||||||
/// The values are not transformed in any way and thus are considered stable
|
|
||||||
/// (if the ProtoBuf definition does not change) and safe for programmatic use.
|
|
||||||
pub fn as_str_name(&self) -> &'static str {
|
|
||||||
match self {
|
|
||||||
FinishReason::Length => "FINISH_REASON_LENGTH",
|
|
||||||
FinishReason::EosToken => "FINISH_REASON_EOS_TOKEN",
|
|
||||||
FinishReason::StopSequence => "FINISH_REASON_STOP_SEQUENCE",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Creates an enum from field names used in the ProtoBuf definition.
|
|
||||||
pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
|
|
||||||
match value {
|
|
||||||
"FINISH_REASON_LENGTH" => Some(Self::Length),
|
|
||||||
"FINISH_REASON_EOS_TOKEN" => Some(Self::EosToken),
|
|
||||||
"FINISH_REASON_STOP_SEQUENCE" => Some(Self::StopSequence),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/// Generated client implementations.
|
|
||||||
pub mod text_generation_service_client {
|
|
||||||
#![allow(unused_variables, dead_code, missing_docs, clippy::let_unit_value)]
|
|
||||||
use tonic::codegen::http::Uri;
|
|
||||||
use tonic::codegen::*;
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct TextGenerationServiceClient<T> {
|
|
||||||
inner: tonic::client::Grpc<T>,
|
|
||||||
}
|
|
||||||
impl TextGenerationServiceClient<tonic::transport::Channel> {
|
|
||||||
/// Attempt to create a new client by connecting to a given endpoint.
|
|
||||||
pub async fn connect<D>(dst: D) -> Result<Self, tonic::transport::Error>
|
|
||||||
where
|
|
||||||
D: TryInto<tonic::transport::Endpoint>,
|
|
||||||
D::Error: Into<StdError>,
|
|
||||||
{
|
|
||||||
let conn = tonic::transport::Endpoint::new(dst)?.connect().await?;
|
|
||||||
Ok(Self::new(conn))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<T> TextGenerationServiceClient<T>
|
|
||||||
where
|
|
||||||
T: tonic::client::GrpcService<tonic::body::BoxBody>,
|
|
||||||
T::Error: Into<StdError>,
|
|
||||||
T::ResponseBody: Body<Data = Bytes> + Send + 'static,
|
|
||||||
<T::ResponseBody as Body>::Error: Into<StdError> + Send,
|
|
||||||
{
|
|
||||||
pub fn new(inner: T) -> Self {
|
|
||||||
let inner = tonic::client::Grpc::new(inner);
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
pub fn with_origin(inner: T, origin: Uri) -> Self {
|
|
||||||
let inner = tonic::client::Grpc::with_origin(inner, origin);
|
|
||||||
Self { inner }
|
|
||||||
}
|
|
||||||
pub fn with_interceptor<F>(
|
|
||||||
inner: T,
|
|
||||||
interceptor: F,
|
|
||||||
) -> TextGenerationServiceClient<InterceptedService<T, F>>
|
|
||||||
where
|
|
||||||
F: tonic::service::Interceptor,
|
|
||||||
T::ResponseBody: Default,
|
|
||||||
T: tonic::codegen::Service<
|
|
||||||
http::Request<tonic::body::BoxBody>,
|
|
||||||
Response = http::Response<
|
|
||||||
<T as tonic::client::GrpcService<tonic::body::BoxBody>>::ResponseBody,
|
|
||||||
>,
|
|
||||||
>,
|
|
||||||
<T as tonic::codegen::Service<http::Request<tonic::body::BoxBody>>>::Error:
|
|
||||||
Into<StdError> + Send + Sync,
|
|
||||||
{
|
|
||||||
TextGenerationServiceClient::new(InterceptedService::new(inner, interceptor))
|
|
||||||
}
|
|
||||||
/// Compress requests with the given encoding.
|
|
||||||
///
|
|
||||||
/// This requires the server to support it otherwise it might respond with an
|
|
||||||
/// error.
|
|
||||||
#[must_use]
|
|
||||||
pub fn send_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
|
||||||
self.inner = self.inner.send_compressed(encoding);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Enable decompressing responses.
|
|
||||||
#[must_use]
|
|
||||||
pub fn accept_compressed(mut self, encoding: CompressionEncoding) -> Self {
|
|
||||||
self.inner = self.inner.accept_compressed(encoding);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Limits the maximum size of a decoded message.
|
|
||||||
///
|
|
||||||
/// Default: `4MB`
|
|
||||||
#[must_use]
|
|
||||||
pub fn max_decoding_message_size(mut self, limit: usize) -> Self {
|
|
||||||
self.inner = self.inner.max_decoding_message_size(limit);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// Limits the maximum size of an encoded message.
|
|
||||||
///
|
|
||||||
/// Default: `usize::MAX`
|
|
||||||
#[must_use]
|
|
||||||
pub fn max_encoding_message_size(mut self, limit: usize) -> Self {
|
|
||||||
self.inner = self.inner.max_encoding_message_size(limit);
|
|
||||||
self
|
|
||||||
}
|
|
||||||
/// / Model Info
|
|
||||||
pub async fn info(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::InfoRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::InfoResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v3.TextGenerationService/Info");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut()
|
|
||||||
.insert(GrpcMethod::new("generate.v3.TextGenerationService", "Info"));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Service discovery
|
|
||||||
pub async fn service_discovery(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::ServiceDiscoveryRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::ServiceDiscoveryResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v3.TextGenerationService/ServiceDiscovery",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"ServiceDiscovery",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Empties batch cache
|
|
||||||
pub async fn clear_cache(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::ClearCacheRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::ClearCacheResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v3.TextGenerationService/ClearCache",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"ClearCache",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Remove requests from a cached batch
|
|
||||||
pub async fn filter_batch(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::FilterBatchRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::FilterBatchResponse>, tonic::Status>
|
|
||||||
{
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path = http::uri::PathAndQuery::from_static(
|
|
||||||
"/generate.v3.TextGenerationService/FilterBatch",
|
|
||||||
);
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"FilterBatch",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Warmup the model and compute max cache size
|
|
||||||
pub async fn warmup(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::WarmupRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::WarmupResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v3.TextGenerationService/Warmup");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"Warmup",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Prefill batch and decode first token
|
|
||||||
pub async fn prefill(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::PrefillRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::PrefillResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v3.TextGenerationService/Prefill");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"Prefill",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Decode token for a list of prefilled batches
|
|
||||||
pub async fn decode(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::DecodeRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::DecodeResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v3.TextGenerationService/Decode");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"Decode",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
/// / Health check
|
|
||||||
pub async fn health(
|
|
||||||
&mut self,
|
|
||||||
request: impl tonic::IntoRequest<super::HealthRequest>,
|
|
||||||
) -> std::result::Result<tonic::Response<super::HealthResponse>, tonic::Status> {
|
|
||||||
self.inner.ready().await.map_err(|e| {
|
|
||||||
tonic::Status::new(
|
|
||||||
tonic::Code::Unknown,
|
|
||||||
format!("Service was not ready: {}", e.into()),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let codec = tonic::codec::ProstCodec::default();
|
|
||||||
let path =
|
|
||||||
http::uri::PathAndQuery::from_static("/generate.v3.TextGenerationService/Health");
|
|
||||||
let mut req = request.into_request();
|
|
||||||
req.extensions_mut().insert(GrpcMethod::new(
|
|
||||||
"generate.v3.TextGenerationService",
|
|
||||||
"Health",
|
|
||||||
));
|
|
||||||
self.inner.unary(req, path, codec).await
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
// This file is @generated by prost-build.
|
|
||||||
pub mod generate {
|
|
||||||
pub mod v3 {
|
|
||||||
include!("generate.v3.rs");
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user