mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Enabling custom block size schedule.
This commit is contained in:
parent
cf59593454
commit
13caf958eb
@ -77,6 +77,9 @@ impl Infer {
|
|||||||
false
|
false
|
||||||
};
|
};
|
||||||
let block_size = if flashdecoding { 256 } else { 32 };
|
let block_size = if flashdecoding { 256 } else { 32 };
|
||||||
|
let block_size = std::env::var("BLOCK_SIZE")
|
||||||
|
.map(|b| b.parse().unwrap_or(block_size))
|
||||||
|
.unwrap_or(block_size);
|
||||||
let queue = Queue::new(requires_padding, block_size, window_size, speculate);
|
let queue = Queue::new(requires_padding, block_size, window_size, speculate);
|
||||||
let shared = Arc::new(Shared {
|
let shared = Arc::new(Shared {
|
||||||
batching_task: Notify::new(),
|
batching_task: Notify::new(),
|
||||||
|
Loading…
Reference in New Issue
Block a user