diff --git a/router/src/server.rs b/router/src/server.rs index 16dd87bc..7a191d61 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -382,7 +382,6 @@ async fn generate_stream( yield Ok(Event::from(err)); } else { match infer.generate_stream(req).instrument(info_span!(parent: &span, "async_stream")).await { - let top_n_tokens = req.parameters.top_n_tokens; // Keep permit as long as generate_stream lives Ok((_permit, mut response_stream)) => { // Server-Sent Event stream diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py index 06229f35..806e9833 100644 --- a/server/text_generation_server/models/model.py +++ b/server/text_generation_server/models/model.py @@ -6,7 +6,6 @@ from typing import List, Tuple, Optional, TypeVar, Type from transformers import PreTrainedTokenizerBase, PretrainedConfig from text_generation_server.models.types import Batch, Generation ->>>>>>> 8471e18 (Defer building top-token objects to Rust) from text_generation_server.pb.generate_pb2 import InfoResponse B = TypeVar("B", bound=Batch)