mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
feat: emit params in logs for each request
This commit is contained in:
parent
593c443b45
commit
bd28c36815
@ -657,6 +657,7 @@ async fn completions(
|
|||||||
format!("{}-{}", info.version, info.docker_label.unwrap_or("native"));
|
format!("{}-{}", info.version, info.docker_label.unwrap_or("native"));
|
||||||
let infer_clone = infer.clone();
|
let infer_clone = infer.clone();
|
||||||
let compute_type_clone = compute_type.clone();
|
let compute_type_clone = compute_type.clone();
|
||||||
|
let params_clone = generate_request.parameters.clone();
|
||||||
|
|
||||||
// Create a future for each generate_stream_internal call.
|
// Create a future for each generate_stream_internal call.
|
||||||
let generate_future = async move {
|
let generate_future = async move {
|
||||||
@ -690,7 +691,8 @@ async fn completions(
|
|||||||
let (header_tx, header_rx) = oneshot::channel();
|
let (header_tx, header_rx) = oneshot::channel();
|
||||||
let (sse_tx, sse_rx) = tokio::sync::mpsc::unbounded_channel();
|
let (sse_tx, sse_rx) = tokio::sync::mpsc::unbounded_channel();
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(
|
||||||
|
async move {
|
||||||
let (header_map, sse) = generate_stream_internal(
|
let (header_map, sse) = generate_stream_internal(
|
||||||
infer_clone.clone(),
|
infer_clone.clone(),
|
||||||
compute_type_clone.clone(),
|
compute_type_clone.clone(),
|
||||||
@ -710,7 +712,11 @@ async fn completions(
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
.instrument(
|
||||||
|
tracing::info_span!("request", index = %index, parameters = ?params_clone),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
|
||||||
(header_rx, sse_rx)
|
(header_rx, sse_rx)
|
||||||
};
|
};
|
||||||
@ -796,6 +802,7 @@ async fn completions(
|
|||||||
for (index, generate_request) in generate_requests.into_iter().enumerate() {
|
for (index, generate_request) in generate_requests.into_iter().enumerate() {
|
||||||
let infer_clone = infer.clone();
|
let infer_clone = infer.clone();
|
||||||
let compute_type_clone = compute_type.clone();
|
let compute_type_clone = compute_type.clone();
|
||||||
|
let params_clone = generate_request.parameters.clone();
|
||||||
let response_future = async move {
|
let response_future = async move {
|
||||||
let result = generate(
|
let result = generate(
|
||||||
Extension(infer_clone),
|
Extension(infer_clone),
|
||||||
@ -804,7 +811,8 @@ async fn completions(
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
result.map(|(headers, generation)| (index, headers, generation))
|
result.map(|(headers, generation)| (index, headers, generation))
|
||||||
};
|
}
|
||||||
|
.instrument(tracing::info_span!("request", index = %index, parameters = ?params_clone));
|
||||||
responses.push(response_future);
|
responses.push(response_future);
|
||||||
}
|
}
|
||||||
let generate_responses = responses.try_collect::<Vec<_>>().await?;
|
let generate_responses = responses.try_collect::<Vec<_>>().await?;
|
||||||
|
Loading…
Reference in New Issue
Block a user