mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Adding a test for FD.
This commit is contained in:
parent
9cca3e0b03
commit
f6697baf31
@ -342,6 +342,7 @@ def launcher(event_loop):
|
|||||||
max_total_tokens: Optional[int] = None,
|
max_total_tokens: Optional[int] = None,
|
||||||
lora_adapters: Optional[List[str]] = None,
|
lora_adapters: Optional[List[str]] = None,
|
||||||
cuda_graphs: Optional[List[int]] = None,
|
cuda_graphs: Optional[List[int]] = None,
|
||||||
|
attention: Optional[str] = None,
|
||||||
):
|
):
|
||||||
port = random.randint(8000, 10_000)
|
port = random.randint(8000, 10_000)
|
||||||
master_port = random.randint(10_000, 20_000)
|
master_port = random.randint(10_000, 20_000)
|
||||||
@ -401,6 +402,8 @@ def launcher(event_loop):
|
|||||||
|
|
||||||
if not use_flash_attention:
|
if not use_flash_attention:
|
||||||
env["USE_FLASH_ATTENTION"] = "false"
|
env["USE_FLASH_ATTENTION"] = "false"
|
||||||
|
if attention is not None:
|
||||||
|
env["ATTENTION"] = attention
|
||||||
|
|
||||||
with tempfile.TemporaryFile("w+") as tmp:
|
with tempfile.TemporaryFile("w+") as tmp:
|
||||||
# We'll output stdout/stderr to a temporary file. Using a pipe
|
# We'll output stdout/stderr to a temporary file. Using a pipe
|
||||||
@ -437,6 +440,7 @@ def launcher(event_loop):
|
|||||||
max_total_tokens: Optional[int] = None,
|
max_total_tokens: Optional[int] = None,
|
||||||
lora_adapters: Optional[List[str]] = None,
|
lora_adapters: Optional[List[str]] = None,
|
||||||
cuda_graphs: Optional[List[int]] = None,
|
cuda_graphs: Optional[List[int]] = None,
|
||||||
|
attention: Optional[str] = None,
|
||||||
):
|
):
|
||||||
port = random.randint(8000, 10_000)
|
port = random.randint(8000, 10_000)
|
||||||
|
|
||||||
@ -491,6 +495,8 @@ def launcher(event_loop):
|
|||||||
}
|
}
|
||||||
if not use_flash_attention:
|
if not use_flash_attention:
|
||||||
env["USE_FLASH_ATTENTION"] = "false"
|
env["USE_FLASH_ATTENTION"] = "false"
|
||||||
|
if attention is not None:
|
||||||
|
env["ATTENTION"] = attention
|
||||||
|
|
||||||
if HF_TOKEN is not None:
|
if HF_TOKEN is not None:
|
||||||
env["HF_TOKEN"] = HF_TOKEN
|
env["HF_TOKEN"] = HF_TOKEN
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user