mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 20:12:07 +00:00
chore: bump rust version and annotate/fix all clippy warnings (#1455)
This PR just bumps the latest rust version and makes clippy happy ```bash cargo clippy --all -- -D warnings # Finished dev [unoptimized + debuginfo] target(s) in 0.10s ```
This commit is contained in:
parent
77afb882dc
commit
935ee00749
@ -466,7 +466,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
|
||||
let latency_percentiles = crate::utils::percentiles(latency, &[50, 90, 99]);
|
||||
|
||||
// Latency p50/p90/p99 texts
|
||||
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
|
||||
let colors = [Color::LightGreen, Color::LightYellow, Color::LightRed];
|
||||
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
|
||||
let span = Line::from(vec![Span::styled(
|
||||
format!("{name}: {value:.2} ms"),
|
||||
|
@ -55,6 +55,8 @@ impl std::fmt::Display for Quantization {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// To keep in track with `server`.
|
||||
match self {
|
||||
#[allow(deprecated)]
|
||||
// Use `eetq` instead, which provides better latencies overall and is drop-in in most cases
|
||||
Quantization::Bitsandbytes => {
|
||||
write!(f, "bitsandbytes")
|
||||
}
|
||||
|
@ -365,7 +365,7 @@ pub struct DecodeTimings {
|
||||
impl DecodeTimings {
|
||||
fn new(concat_ns: Option<u64>, forward_ns: u64, decode_ns: u64, total_ns: u64) -> Self {
|
||||
Self {
|
||||
concat: concat_ns.map(|v| Duration::from_nanos(v)),
|
||||
concat: concat_ns.map(Duration::from_nanos),
|
||||
forward: Duration::from_nanos(forward_ns),
|
||||
decode: Duration::from_nanos(decode_ns),
|
||||
total: Duration::from_nanos(total_ns),
|
||||
|
@ -130,6 +130,7 @@ impl ShardedClient {
|
||||
.iter_mut()
|
||||
.map(|client| Box::pin(client.prefill(batch.clone())))
|
||||
.collect();
|
||||
#[allow(clippy::type_complexity)]
|
||||
let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>, PrefillTimings)>> =
|
||||
join_all(futures).await.into_iter().collect();
|
||||
let mut results = results?;
|
||||
@ -162,6 +163,7 @@ impl ShardedClient {
|
||||
.iter_mut()
|
||||
.map(|client| Box::pin(client.decode(batches.clone())))
|
||||
.collect();
|
||||
#[allow(clippy::type_complexity)]
|
||||
let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>, DecodeTimings)>> =
|
||||
join_all(futures).await.into_iter().collect();
|
||||
let mut results = results?;
|
||||
|
Loading…
Reference in New Issue
Block a user