mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Move disabling prefix caching into the block of exceptions
This commit is contained in:
parent
8c0f9312f3
commit
a29636ee0a
@ -103,6 +103,10 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
|
|||||||
);
|
);
|
||||||
attention = Some(fallback_attention.to_string());
|
attention = Some(fallback_attention.to_string());
|
||||||
}
|
}
|
||||||
|
if fallback_attention == "paged" && prefix_caching.is_none() {
|
||||||
|
tracing::info!("Disabling prefix caching because it is not supported with 'paged' attention");
|
||||||
|
prefix_caching = Some("0".to_string());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Some("t5") => {}
|
Some("t5") => {}
|
||||||
_ => {}
|
_ => {}
|
||||||
@ -119,16 +123,9 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let attention = attention.unwrap_or("flashinfer".to_string());
|
let attention = attention.unwrap_or("flashinfer".to_string());
|
||||||
let prefix_caching = if attention == "paged"
|
let prefix_caching = prefix_caching.unwrap_or("true".to_string());
|
||||||
&& prefix_caching.is_none()
|
|
||||||
&& compute_capability.is_some()
|
|
||||||
{
|
|
||||||
tracing::info!("Disabling prefix caching because it is not supported with 'flashinfer'");
|
|
||||||
"false".to_string()
|
|
||||||
} else {
|
|
||||||
prefix_caching.unwrap_or("true".to_string())
|
|
||||||
};
|
|
||||||
|
|
||||||
(prefix_caching, attention)
|
(prefix_caching, attention)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user