mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Enabling custom block size schedule.
This commit is contained in:
parent
cf59593454
commit
13caf958eb
@ -77,6 +77,9 @@ impl Infer {
|
||||
false
|
||||
};
|
||||
let block_size = if flashdecoding { 256 } else { 32 };
|
||||
let block_size = std::env::var("BLOCK_SIZE")
|
||||
.map(|b| b.parse().unwrap_or(block_size))
|
||||
.unwrap_or(block_size);
|
||||
let queue = Queue::new(requires_padding, block_size, window_size, speculate);
|
||||
let shared = Arc::new(Shared {
|
||||
batching_task: Notify::new(),
|
||||
|
Loading…
Reference in New Issue
Block a user