mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
fix: cleanup ruff lints
This commit is contained in:
parent
247033b45f
commit
541c476492
@ -1,6 +1,4 @@
|
|||||||
import torch
|
import torch
|
||||||
import pytest
|
|
||||||
from text_generation_server.models.globals import ATTENTION, BLOCK_SIZE
|
|
||||||
from text_generation_server.utils.import_utils import SYSTEM
|
from text_generation_server.utils.import_utils import SYSTEM
|
||||||
|
|
||||||
# only include this import when CUDA is available
|
# only include this import when CUDA is available
|
||||||
@ -18,7 +16,7 @@ def kvcache_memory():
|
|||||||
|
|
||||||
current_memory = torch.cuda.memory_allocated(device)
|
current_memory = torch.cuda.memory_allocated(device)
|
||||||
|
|
||||||
kv_cache = [
|
_kv_cache = [
|
||||||
KVCache(
|
KVCache(
|
||||||
num_blocks=num_blocks,
|
num_blocks=num_blocks,
|
||||||
num_heads=num_kv_heads,
|
num_heads=num_kv_heads,
|
||||||
|
Loading…
Reference in New Issue
Block a user