mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
feat: avoid unwrap and pre allocate future vec
This commit is contained in:
parent
b5dd58f73b
commit
ed8c7726ba
@ -1407,10 +1407,9 @@ async fn vertex_compatibility(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Prepare futures for all instances
|
// Prepare futures for all instances
|
||||||
let futures: Vec<_> = req
|
let mut futures = Vec::with_capacity(req.instances.len());
|
||||||
.instances
|
|
||||||
.iter()
|
for instance in req.instances.iter() {
|
||||||
.map(|instance| {
|
|
||||||
let generate_request = match instance {
|
let generate_request = match instance {
|
||||||
VertexInstance::Generate(instance) => GenerateRequest {
|
VertexInstance::Generate(instance) => GenerateRequest {
|
||||||
inputs: instance.inputs.clone(),
|
inputs: instance.inputs.clone(),
|
||||||
@ -1456,7 +1455,7 @@ async fn vertex_compatibility(
|
|||||||
Some(temperature) if temperature == 0.0 => (false, None),
|
Some(temperature) if temperature == 0.0 => (false, None),
|
||||||
other => (true, other),
|
other => (true, other),
|
||||||
};
|
};
|
||||||
let (inputs, grammar, _using_tools) = prepare_chat_input(
|
let (inputs, grammar, _using_tools) = match prepare_chat_input(
|
||||||
&infer,
|
&infer,
|
||||||
response_format,
|
response_format,
|
||||||
tools,
|
tools,
|
||||||
@ -1464,10 +1463,19 @@ async fn vertex_compatibility(
|
|||||||
&tool_prompt,
|
&tool_prompt,
|
||||||
guideline,
|
guideline,
|
||||||
messages,
|
messages,
|
||||||
)
|
) {
|
||||||
.unwrap();
|
Ok(result) => result,
|
||||||
|
Err(e) => {
|
||||||
|
return Err((
|
||||||
|
StatusCode::BAD_REQUEST,
|
||||||
|
Json(ErrorResponse {
|
||||||
|
error: format!("Failed to prepare chat input: {}", e),
|
||||||
|
error_type: "Input preparation error".to_string(),
|
||||||
|
}),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// build the request passing some parameters
|
|
||||||
GenerateRequest {
|
GenerateRequest {
|
||||||
inputs: inputs.to_string(),
|
inputs: inputs.to_string(),
|
||||||
add_special_tokens: false,
|
add_special_tokens: false,
|
||||||
@ -1500,7 +1508,7 @@ async fn vertex_compatibility(
|
|||||||
let compute_type_clone = compute_type.clone();
|
let compute_type_clone = compute_type.clone();
|
||||||
let span_clone = span.clone();
|
let span_clone = span.clone();
|
||||||
|
|
||||||
async move {
|
futures.push(async move {
|
||||||
generate_internal(
|
generate_internal(
|
||||||
Extension(infer_clone),
|
Extension(infer_clone),
|
||||||
compute_type_clone,
|
compute_type_clone,
|
||||||
@ -1518,9 +1526,8 @@ async fn vertex_compatibility(
|
|||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
});
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
// execute all futures in parallel, collect results, returning early if any error occurs
|
// execute all futures in parallel, collect results, returning early if any error occurs
|
||||||
let results = futures::future::join_all(futures).await;
|
let results = futures::future::join_all(futures).await;
|
||||||
|
Loading…
Reference in New Issue
Block a user