mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
fix dockerfiles
This commit is contained in:
parent
9410a79df4
commit
eb6a02a0f1
@ -25,7 +25,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|||||||
rm -f $PROTOC_ZIP
|
rm -f $PROTOC_ZIP
|
||||||
|
|
||||||
COPY --from=planner /usr/src/recipe.json recipe.json
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
||||||
RUN cargo chef cook --release --recipe-path recipe.json
|
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
|
||||||
|
|
||||||
COPY Cargo.toml Cargo.toml
|
COPY Cargo.toml Cargo.toml
|
||||||
COPY rust-toolchain.toml rust-toolchain.toml
|
COPY rust-toolchain.toml rust-toolchain.toml
|
||||||
@ -226,11 +226,11 @@ RUN cd server && \
|
|||||||
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
|
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
# Install router
|
# Install router
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
||||||
# Install launcher
|
# Install launcher
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
build-essential \
|
build-essential \
|
||||||
|
@ -25,7 +25,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|||||||
rm -f $PROTOC_ZIP
|
rm -f $PROTOC_ZIP
|
||||||
|
|
||||||
COPY --from=planner /usr/src/recipe.json recipe.json
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
||||||
RUN cargo chef cook --release --recipe-path recipe.json
|
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
|
||||||
|
|
||||||
COPY Cargo.toml Cargo.toml
|
COPY Cargo.toml Cargo.toml
|
||||||
COPY rust-toolchain.toml rust-toolchain.toml
|
COPY rust-toolchain.toml rust-toolchain.toml
|
||||||
@ -193,11 +193,11 @@ RUN cd server && \
|
|||||||
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
pip install ".[accelerate, peft, outlines]" --no-cache-dir
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
# Install router
|
# Install router
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
||||||
# Install launcher
|
# Install launcher
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||||
|
|
||||||
# AWS Sagemaker compatible image
|
# AWS Sagemaker compatible image
|
||||||
FROM base as sagemaker
|
FROM base as sagemaker
|
||||||
|
@ -24,7 +24,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
|||||||
rm -f $PROTOC_ZIP
|
rm -f $PROTOC_ZIP
|
||||||
|
|
||||||
COPY --from=planner /usr/src/recipe.json recipe.json
|
COPY --from=planner /usr/src/recipe.json recipe.json
|
||||||
RUN cargo chef cook --release --recipe-path recipe.json
|
RUN cargo chef cook --profile release-opt --recipe-path recipe.json
|
||||||
|
|
||||||
COPY Cargo.toml Cargo.toml
|
COPY Cargo.toml Cargo.toml
|
||||||
COPY rust-toolchain.toml rust-toolchain.toml
|
COPY rust-toolchain.toml rust-toolchain.toml
|
||||||
@ -78,11 +78,11 @@ ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mp
|
|||||||
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||||
|
|
||||||
# Install benchmarker
|
# Install benchmarker
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark
|
||||||
# Install router
|
# Install router
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
|
COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router
|
||||||
# Install launcher
|
# Install launcher
|
||||||
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
|
COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||||
|
|
||||||
# Final image
|
# Final image
|
||||||
FROM base
|
FROM base
|
||||||
|
@ -14,7 +14,7 @@ use text_generation_client::ChunksToString;
|
|||||||
use text_generation_client::Input;
|
use text_generation_client::Input;
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
use tracing::{info_span, instrument, Span};
|
use tracing::{info_span, instrument, Instrument, Span};
|
||||||
|
|
||||||
/// Queue entry
|
/// Queue entry
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -136,9 +136,9 @@ async fn queue_task(
|
|||||||
response_sender,
|
response_sender,
|
||||||
span,
|
span,
|
||||||
} => {
|
} => {
|
||||||
let _parent_span = span.enter();
|
|
||||||
let next_batch = state
|
let next_batch = state
|
||||||
.next_batch(min_size, max_size, prefill_token_budget, token_budget)
|
.next_batch(min_size, max_size, prefill_token_budget, token_budget)
|
||||||
|
.instrument(span)
|
||||||
.await;
|
.await;
|
||||||
response_sender.send(next_batch).unwrap();
|
response_sender.send(next_batch).unwrap();
|
||||||
metrics::gauge!("tgi_queue_size", state.entries.len() as f64);
|
metrics::gauge!("tgi_queue_size", state.entries.len() as f64);
|
||||||
@ -180,14 +180,8 @@ impl State {
|
|||||||
speculate: u32,
|
speculate: u32,
|
||||||
max_batch_total_tokens: u32,
|
max_batch_total_tokens: u32,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let block_allocator = match requires_padding {
|
let block_allocator = requires_padding
|
||||||
true => None,
|
.then(|| BlockAllocator::new(max_batch_total_tokens, block_size, window_size));
|
||||||
false => Some(BlockAllocator::new(
|
|
||||||
max_batch_total_tokens,
|
|
||||||
block_size,
|
|
||||||
window_size,
|
|
||||||
)),
|
|
||||||
};
|
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
entries: VecDeque::with_capacity(128),
|
entries: VecDeque::with_capacity(128),
|
||||||
|
Loading…
Reference in New Issue
Block a user