Fixing prefix attention.

This commit is contained in:
Nicolas Patry 2024-08-12 16:23:18 +02:00
parent 44a77dcb9e
commit 0c90550e9d
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
2 changed files with 1 additions and 3 deletions

View File

@ -21,7 +21,6 @@
from contextlib import contextmanager from contextlib import contextmanager
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
from loguru import logger
import torch import torch
import torch.distributed import torch.distributed

View File

@ -6,8 +6,7 @@ from typing import Dict, Optional
from text_generation_server.utils.log import log_master from text_generation_server.utils.log import log_master
PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", False) PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", False)
log_master(logger.info, f"Using Attention = {PREFIX_CACHING}") log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged") ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged")
_expected = {"paged", "flashdecoding", "flashinfer"} _expected = {"paged", "flashdecoding", "flashinfer"}
assert ( assert (