mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
Upgrading our rustc version. (#2908)
* Upgrading our rustc version. * Fixing the rust tests to proper version. * Clippy everything.
This commit is contained in:
parent
46994b34fb
commit
203cade244
2
.github/workflows/tests.yaml
vendored
2
.github/workflows/tests.yaml
vendored
@ -31,7 +31,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
# Released on: 02 May, 2024
|
# Released on: 02 May, 2024
|
||||||
# https://releases.rs/docs/1.78.0/
|
# https://releases.rs/docs/1.78.0/
|
||||||
toolchain: 1.80.0
|
toolchain: 1.84.0
|
||||||
override: true
|
override: true
|
||||||
components: rustfmt, clippy
|
components: rustfmt, clippy
|
||||||
- name: Install Protoc
|
- name: Install Protoc
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
# Rust builder
|
# Rust builder
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
ARG PLATFORM=xpu
|
ARG PLATFORM=xpu
|
||||||
|
|
||||||
FROM lukemathwalker/cargo-chef:latest-rust-1.80.1 AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||||
|
@ -2,7 +2,7 @@ ARG CUDA_ARCH_LIST="75-real;80-real;86-real;89-real;90-real"
|
|||||||
ARG OMPI_VERSION="4.1.7rc1"
|
ARG OMPI_VERSION="4.1.7rc1"
|
||||||
|
|
||||||
# Build dependencies resolver stage
|
# Build dependencies resolver stage
|
||||||
FROM lukemathwalker/cargo-chef:latest AS chef
|
FROM lukemathwalker/cargo-chef:latest-rust-1.84.0 AS chef
|
||||||
WORKDIR /usr/src/text-generation-inference/backends/trtllm
|
WORKDIR /usr/src/text-generation-inference/backends/trtllm
|
||||||
|
|
||||||
FROM chef AS planner
|
FROM chef AS planner
|
||||||
|
@ -8,7 +8,7 @@ use tracing_opentelemetry::OpenTelemetrySpanExt;
|
|||||||
/// Inject context in the metadata of a gRPC request.
|
/// Inject context in the metadata of a gRPC request.
|
||||||
struct MetadataInjector<'a>(pub &'a mut tonic::metadata::MetadataMap);
|
struct MetadataInjector<'a>(pub &'a mut tonic::metadata::MetadataMap);
|
||||||
|
|
||||||
impl<'a> Injector for MetadataInjector<'a> {
|
impl Injector for MetadataInjector<'_> {
|
||||||
/// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs
|
/// Set a key and value in the MetadataMap. Does nothing if the key or value are not valid inputs
|
||||||
fn set(&mut self, key: &str, value: String) {
|
fn set(&mut self, key: &str, value: String) {
|
||||||
if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) {
|
if let Ok(key) = tonic::metadata::MetadataKey::from_bytes(key.as_bytes()) {
|
||||||
|
@ -213,8 +213,7 @@ impl State {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pad prefill_token_budget to be a multiple of block size
|
// Pad prefill_token_budget to be a multiple of block size
|
||||||
let prefill_token_budget =
|
let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size;
|
||||||
((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
|
|
||||||
|
|
||||||
// Create span for this batch to add context to inference calls
|
// Create span for this batch to add context to inference calls
|
||||||
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
|
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
|
||||||
@ -245,9 +244,8 @@ impl State {
|
|||||||
prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length
|
prefill_tokens = (batch_requests.len() + 1) as u32 * max_input_length
|
||||||
} else {
|
} else {
|
||||||
// pad to block size
|
// pad to block size
|
||||||
prefill_tokens += ((entry.request.input_length + self.block_size - 1)
|
prefill_tokens +=
|
||||||
/ self.block_size)
|
entry.request.input_length.div_ceil(self.block_size) * self.block_size;
|
||||||
* self.block_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.requires_padding {
|
if self.requires_padding {
|
||||||
@ -262,8 +260,7 @@ impl State {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// pad to block size
|
// pad to block size
|
||||||
decode_tokens +=
|
decode_tokens += max_new_tokens.div_ceil(self.block_size) * self.block_size;
|
||||||
((max_new_tokens + self.block_size - 1) / self.block_size) * self.block_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if prefill_tokens > prefill_token_budget
|
if prefill_tokens > prefill_token_budget
|
||||||
|
@ -165,13 +165,13 @@ impl Allocator for SimpleAllocator {
|
|||||||
let (tokens, repeats) = match self.window_size {
|
let (tokens, repeats) = match self.window_size {
|
||||||
None => (tokens, 1),
|
None => (tokens, 1),
|
||||||
Some(window_size) => {
|
Some(window_size) => {
|
||||||
let repeats = (tokens + window_size - 1) / window_size;
|
let repeats = tokens.div_ceil(window_size);
|
||||||
let tokens = core::cmp::min(tokens, window_size);
|
let tokens = core::cmp::min(tokens, window_size);
|
||||||
(tokens, repeats as usize)
|
(tokens, repeats as usize)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// Pad to a multiple of block size
|
// Pad to a multiple of block size
|
||||||
let required_blocks = (tokens + self.block_size - 1) / self.block_size;
|
let required_blocks = tokens.div_ceil(self.block_size);
|
||||||
(required_blocks, repeats)
|
(required_blocks, repeats)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -257,8 +257,7 @@ impl State {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pad prefill_token_budget to be a multiple of block size
|
// Pad prefill_token_budget to be a multiple of block size
|
||||||
let prefill_token_budget =
|
let prefill_token_budget = prefill_token_budget.div_ceil(self.block_size) * self.block_size;
|
||||||
((prefill_token_budget + self.block_size - 1) / self.block_size) * self.block_size;
|
|
||||||
|
|
||||||
// Create span for this batch to add context to inference calls
|
// Create span for this batch to add context to inference calls
|
||||||
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
|
let next_batch_span = info_span!(parent: None, "batch", batch_size = tracing::field::Empty);
|
||||||
|
@ -103,7 +103,7 @@ impl Allocator for RadixAllocator {
|
|||||||
let prefix_len = blocks.len() * self.block_size as usize;
|
let prefix_len = blocks.len() * self.block_size as usize;
|
||||||
let suffix_len = tokens - prefix_len as u32;
|
let suffix_len = tokens - prefix_len as u32;
|
||||||
|
|
||||||
let suffix_blocks = (suffix_len + self.block_size - 1) / self.block_size;
|
let suffix_blocks = suffix_len.div_ceil(self.block_size);
|
||||||
|
|
||||||
tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");
|
tracing::info!("Prefix {prefix_len} - Suffix {suffix_len}");
|
||||||
|
|
||||||
|
12
flake.lock
12
flake.lock
@ -108,11 +108,11 @@
|
|||||||
"pre-commit-hooks": "pre-commit-hooks_3"
|
"pre-commit-hooks": "pre-commit-hooks_3"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1732039290,
|
"lastModified": 1734429562,
|
||||||
"narHash": "sha256-LQKY7bShf2H9kJouxa9ZspfdrulnZF9o4kLTqGqCDYM=",
|
"narHash": "sha256-V2XNs3Ir8WXNHdocfzkR/fu0FzkZ9uTDJkVecxJrGmQ=",
|
||||||
"owner": "nix-community",
|
"owner": "nix-community",
|
||||||
"repo": "crate2nix",
|
"repo": "crate2nix",
|
||||||
"rev": "9ff208ce7f5a482272b1bcefbe363c772d7ff914",
|
"rev": "8537c2d7cb623679aaeff62c4c4c43a91566ab09",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
@ -853,11 +853,11 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1732242723,
|
"lastModified": 1736907983,
|
||||||
"narHash": "sha256-NWI8csIK0ujFlFuEXKnoc+7hWoCiEtINK9r48LUUMeU=",
|
"narHash": "sha256-fw55wVwpJW36Md2HZBKuxX3YHGeqsGsspPLtCMVr1Y8=",
|
||||||
"owner": "oxalica",
|
"owner": "oxalica",
|
||||||
"repo": "rust-overlay",
|
"repo": "rust-overlay",
|
||||||
"rev": "a229311fcb45b88a95fdfa5cecd8349c809a272a",
|
"rev": "eaa365c911441e07e387ff6acc596619fc50b156",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -5,7 +5,6 @@ use hf_hub::{
|
|||||||
};
|
};
|
||||||
use nix::sys::signal::{self, Signal};
|
use nix::sys::signal::{self, Signal};
|
||||||
use nix::unistd::Pid;
|
use nix::unistd::Pid;
|
||||||
use regex::Regex;
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
@ -2176,11 +2175,12 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// capture adapter_id, path, revision in format of adapter_id=path@revision
|
// capture adapter_id, path, revision in format of adapter_id=path@revision
|
||||||
let re = Regex::new(r"^([^=@]+)(?:=([^@]+))?(?:@(.+))?$").unwrap();
|
// path is disabled beforehand.
|
||||||
if let Some(caps) = re.captures(adapter) {
|
let mut splits = adapter.split("@");
|
||||||
let adapter_id = caps.get(1).map_or("", |m| m.as_str());
|
let adapter_id = splits.next().ok_or_else(|| {
|
||||||
let revision = caps.get(3).map(|m| m.as_str());
|
LauncherError::ArgumentValidation("Missing adapter id".to_string())
|
||||||
|
})?;
|
||||||
|
let revision = splits.next();
|
||||||
download_convert_model(
|
download_convert_model(
|
||||||
adapter_id,
|
adapter_id,
|
||||||
revision,
|
revision,
|
||||||
@ -2190,12 +2190,6 @@ fn main() -> Result<(), LauncherError> {
|
|||||||
running.clone(),
|
running.clone(),
|
||||||
false, // avoid merging lora adapters if using multi-lora
|
false, // avoid merging lora adapters if using multi-lora
|
||||||
)?;
|
)?;
|
||||||
} else {
|
|
||||||
return Err(LauncherError::ArgumentValidation(format!(
|
|
||||||
"Invalid LoRA adapter format: {}",
|
|
||||||
adapter
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,7 +79,7 @@ impl TokenizerTrait for tokenizers::Tokenizer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> TokenizerTrait for PyTokenizer<'a> {
|
impl TokenizerTrait for PyTokenizer<'_> {
|
||||||
fn encode_trait(
|
fn encode_trait(
|
||||||
&self,
|
&self,
|
||||||
query: String,
|
query: String,
|
||||||
|
@ -1229,12 +1229,11 @@ mod tests {
|
|||||||
assert!(
|
assert!(
|
||||||
chunks
|
chunks
|
||||||
== vec![
|
== vec![
|
||||||
Chunk::Text("test".to_string()).into(),
|
Chunk::Text("test".to_string()),
|
||||||
Chunk::Image(Image {
|
Chunk::Image(Image {
|
||||||
data: pixel_data.clone(),
|
data: pixel_data.clone(),
|
||||||
mimetype: "image/gif".to_string()
|
mimetype: "image/gif".to_string()
|
||||||
})
|
})
|
||||||
.into()
|
|
||||||
],
|
],
|
||||||
"Failed to process images",
|
"Failed to process images",
|
||||||
);
|
);
|
||||||
@ -1289,17 +1288,15 @@ mod tests {
|
|||||||
assert!(
|
assert!(
|
||||||
chunks
|
chunks
|
||||||
== vec![
|
== vec![
|
||||||
Chunk::Text("test".to_string()).into(),
|
Chunk::Text("test".to_string()),
|
||||||
|
Chunk::Image(Image {
|
||||||
|
data: pixel_data.clone(),
|
||||||
|
mimetype: "image/gif".to_string()
|
||||||
|
}),
|
||||||
Chunk::Image(Image {
|
Chunk::Image(Image {
|
||||||
data: pixel_data.clone(),
|
data: pixel_data.clone(),
|
||||||
mimetype: "image/gif".to_string()
|
mimetype: "image/gif".to_string()
|
||||||
})
|
})
|
||||||
.into(),
|
|
||||||
Chunk::Image(Image {
|
|
||||||
data: pixel_data.clone(),
|
|
||||||
mimetype: "image/gif".to_string()
|
|
||||||
})
|
|
||||||
.into()
|
|
||||||
],
|
],
|
||||||
"Failed to process images",
|
"Failed to process images",
|
||||||
);
|
);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
[toolchain]
|
[toolchain]
|
||||||
# Released on: June 13, 2024
|
# Released on: June 13, 2024
|
||||||
# https://releases.rs/docs/1.79.0/
|
# https://releases.rs/docs/1.79.0/
|
||||||
channel = "1.80.1"
|
channel = "1.84.0"
|
||||||
components = ["rustfmt", "clippy"]
|
components = ["rustfmt", "clippy"]
|
||||||
|
Loading…
Reference in New Issue
Block a user