diff --git a/backends/v3/src/backend.rs b/backends/v3/src/backend.rs index 49e2bc8f..d82355de 100644 --- a/backends/v3/src/backend.rs +++ b/backends/v3/src/backend.rs @@ -35,9 +35,16 @@ impl BackendV3 { window_size: Option, speculate: u32, ) -> Self { + let flashdecoding = if let Ok(flashdecoding) = std::env::var("FLASH_DECODING") { + matches!(flashdecoding.to_lowercase().as_str(), "1" | "true") + } else { + false + }; + let block_size = if flashdecoding { 256 } else { 16 }; + let queue = Queue::new( requires_padding, - 16, + block_size, window_size, speculate, max_batch_total_tokens,