mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
Set maximum grpc message receive size to 2GiB (#2075)
* Set maximum grpc message receive size to 2GiB The previous default was 4MiB, which doesn't really work well for multi-modal models. * Update to Rust 1.79.0 * Fixup formatting to make PR pass
This commit is contained in:
parent
b3dadbde06
commit
6b2cbd0169
@ -1,5 +1,5 @@
|
||||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
||||
"Lowest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
||||
"Highest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>(
|
||||
let min_latency: f64 = *latency_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_latency: f64 = *latency_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let min_throughput: f64 = *throughput_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_throughput: f64 = *throughput_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
|
||||
// Char min max values
|
||||
let min_x = if zoom {
|
||||
|
@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
|
||||
let min = data
|
||||
.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max = data
|
||||
.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
(average, *min, *max)
|
||||
}
|
||||
|
||||
fn px(data: &[f64], p: u32) -> f64 {
|
||||
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
||||
*data.get(i).unwrap_or(&std::f64::NAN)
|
||||
*data.get(i).unwrap_or(&f64::NAN)
|
||||
}
|
||||
|
||||
fn format_value(value: f64, unit: &'static str) -> String {
|
||||
|
@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap<String, f
|
||||
.iter()
|
||||
.map(|&p| {
|
||||
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&std::f64::NAN))
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&f64::NAN))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
[toolchain]
|
||||
# Released on: 02 May, 2024
|
||||
# https://releases.rs/docs/1.78.0/
|
||||
channel = "1.78.0"
|
||||
# Released on: June 13, 2024
|
||||
# https://releases.rs/docs/1.79.0/
|
||||
channel = "1.79.0"
|
||||
components = ["rustfmt", "clippy"]
|
||||
|
@ -240,7 +240,11 @@ def serve(
|
||||
interceptors=[
|
||||
ExceptionInterceptor(),
|
||||
UDSOpenTelemetryAioServerInterceptor(),
|
||||
]
|
||||
],
|
||||
options=[
|
||||
# Set the maximum possible message length: i32::MAX
|
||||
("grpc.max_receive_message_length", (1 << 31) - 1)
|
||||
],
|
||||
)
|
||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||
TextGenerationService(model, Cache(), quantize, server_urls), server
|
||||
|
Loading…
Reference in New Issue
Block a user