mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
More tensor cores.
This commit is contained in:
parent
75c8c54ac9
commit
56c630a425
@ -156,7 +156,7 @@ def create_decode_state(
|
||||
workspace_buffer,
|
||||
kv_layout="NHD",
|
||||
use_cuda_graph=False,
|
||||
use_tensor_cores=num_heads // num_kv_heads > 4,
|
||||
use_tensor_cores=num_heads // num_kv_heads > 1,
|
||||
)
|
||||
|
||||
|
||||
@ -182,7 +182,7 @@ def create_decode_state_cuda_graphs(
|
||||
paged_kv_indices_buffer=block_tables,
|
||||
paged_kv_indptr_buffer=block_tables_ptr,
|
||||
paged_kv_last_page_len_buffer=last_page_len,
|
||||
use_tensor_cores=num_heads // num_kv_heads > 4,
|
||||
use_tensor_cores=num_heads // num_kv_heads > 1,
|
||||
)
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user