diff --git a/Dockerfile b/Dockerfile index 029682db..d9a31af7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -25,7 +25,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ rm -f $PROTOC_ZIP COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --release --recipe-path recipe.json +RUN cargo chef cook --profile release-opt --recipe-path recipe.json COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml @@ -226,11 +226,11 @@ RUN cd server && \ pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir # Install benchmarker -COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark +COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # Install router -COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router +COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # Install launcher -COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher +COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ build-essential \ diff --git a/Dockerfile_amd b/Dockerfile_amd index 857943bd..b0d181ea 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -25,7 +25,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ rm -f $PROTOC_ZIP COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --release --recipe-path recipe.json +RUN cargo chef cook --profile release-opt --recipe-path recipe.json COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml @@ -193,11 +193,11 @@ RUN cd server && \ pip install ".[accelerate, peft, outlines]" --no-cache-dir # Install benchmarker -COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark +COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # Install router -COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router +COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # Install launcher -COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher +COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher # AWS Sagemaker compatible image FROM base as sagemaker diff --git a/Dockerfile_intel b/Dockerfile_intel index 1ae2787a..0a700003 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -24,7 +24,7 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ rm -f $PROTOC_ZIP COPY --from=planner /usr/src/recipe.json recipe.json -RUN cargo chef cook --release --recipe-path recipe.json +RUN cargo chef cook --profile release-opt --recipe-path recipe.json COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml @@ -78,11 +78,11 @@ ENV PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mp ENV CCL_ZE_IPC_EXCHANGE=sockets # Install benchmarker -COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark +COPY --from=builder /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # Install router -COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router +COPY --from=builder /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # Install launcher -COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher +COPY --from=builder /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher # Final image FROM base diff --git a/router/src/infer/v3/queue.rs b/router/src/infer/v3/queue.rs index d45c806a..19e0a23e 100644 --- a/router/src/infer/v3/queue.rs +++ b/router/src/infer/v3/queue.rs @@ -14,7 +14,7 @@ use text_generation_client::ChunksToString; use text_generation_client::Input; use tokio::sync::{mpsc, oneshot}; use tokio::time::Instant; -use tracing::{info_span, instrument, Span}; +use tracing::{info_span, instrument, Instrument, Span}; /// Queue entry #[derive(Debug)] @@ -136,9 +136,9 @@ async fn queue_task( response_sender, span, } => { - let _parent_span = span.enter(); let next_batch = state .next_batch(min_size, max_size, prefill_token_budget, token_budget) + .instrument(span) .await; response_sender.send(next_batch).unwrap(); metrics::gauge!("tgi_queue_size", state.entries.len() as f64); @@ -180,14 +180,8 @@ impl State { speculate: u32, max_batch_total_tokens: u32, ) -> Self { - let block_allocator = match requires_padding { - true => None, - false => Some(BlockAllocator::new( - max_batch_total_tokens, - block_size, - window_size, - )), - }; + let block_allocator = requires_padding + .then(|| BlockAllocator::new(max_batch_total_tokens, block_size, window_size)); Self { entries: VecDeque::with_capacity(128),