Fixing prefix attention.

This commit is contained in:
Nicolas Patry 2024-08-12 16:23:18 +02:00
parent 44a77dcb9e
commit 0c90550e9d
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
2 changed files with 1 additions and 3 deletions

View File

@ -21,7 +21,6 @@
from contextlib import contextmanager
from typing import List, Optional, Tuple
from loguru import logger
import torch
import torch.distributed

View File

@ -6,8 +6,7 @@ from typing import Dict, Optional
from text_generation_server.utils.log import log_master
PREFIX_CACHING = os.getenv("USE_PREFIX_CACHING", False)
log_master(logger.info, f"Using Attention = {PREFIX_CACHING}")
log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
ATTENTION = os.getenv("ATTENTION", "flashinfer" if PREFIX_CACHING else "paged")
_expected = {"paged", "flashdecoding", "flashinfer"}
assert (