mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Add simple resolution when user specifies ATTENTION=paged.
This commit is contained in:
parent
5c72f269b6
commit
ff36b2fb39
@ -124,6 +124,10 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if attention == Some("paged".to_string()) && prefix_caching.is_none() {
|
||||||
|
tracing::info!("Disabling prefix caching on paged attention");
|
||||||
|
prefix_caching = Some("0".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
let attention = attention.unwrap_or("flashinfer".to_string());
|
let attention = attention.unwrap_or("flashinfer".to_string());
|
||||||
let prefix_caching = prefix_caching.unwrap_or("true".to_string());
|
let prefix_caching = prefix_caching.unwrap_or("true".to_string());
|
||||||
|
Loading…
Reference in New Issue
Block a user