mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Adding a test for FD.
This commit is contained in:
parent
9cca3e0b03
commit
f6697baf31
@ -342,6 +342,7 @@ def launcher(event_loop):
|
||||
max_total_tokens: Optional[int] = None,
|
||||
lora_adapters: Optional[List[str]] = None,
|
||||
cuda_graphs: Optional[List[int]] = None,
|
||||
attention: Optional[str] = None,
|
||||
):
|
||||
port = random.randint(8000, 10_000)
|
||||
master_port = random.randint(10_000, 20_000)
|
||||
@ -401,6 +402,8 @@ def launcher(event_loop):
|
||||
|
||||
if not use_flash_attention:
|
||||
env["USE_FLASH_ATTENTION"] = "false"
|
||||
if attention is not None:
|
||||
env["ATTENTION"] = attention
|
||||
|
||||
with tempfile.TemporaryFile("w+") as tmp:
|
||||
# We'll output stdout/stderr to a temporary file. Using a pipe
|
||||
@ -437,6 +440,7 @@ def launcher(event_loop):
|
||||
max_total_tokens: Optional[int] = None,
|
||||
lora_adapters: Optional[List[str]] = None,
|
||||
cuda_graphs: Optional[List[int]] = None,
|
||||
attention: Optional[str] = None,
|
||||
):
|
||||
port = random.randint(8000, 10_000)
|
||||
|
||||
@ -491,6 +495,8 @@ def launcher(event_loop):
|
||||
}
|
||||
if not use_flash_attention:
|
||||
env["USE_FLASH_ATTENTION"] = "false"
|
||||
if attention is not None:
|
||||
env["ATTENTION"] = attention
|
||||
|
||||
if HF_TOKEN is not None:
|
||||
env["HF_TOKEN"] = HF_TOKEN
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user