mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Remove some comments.
This commit is contained in:
parent
37790de5ca
commit
c67bec168e
@ -76,9 +76,6 @@ impl Allocator for RadixAllocator {
|
|||||||
&prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)],
|
&prefill_tokens.as_slice()[..prefill_tokens.len().saturating_sub(1)],
|
||||||
&mut blocks,
|
&mut blocks,
|
||||||
);
|
);
|
||||||
// Even if this allocation fails below, we need to increase he
|
|
||||||
// refcount to ensure that the prefix that was found is not evicted.
|
|
||||||
|
|
||||||
node_id
|
node_id
|
||||||
} else {
|
} else {
|
||||||
self.cache_blocks.root_id()
|
self.cache_blocks.root_id()
|
||||||
|
@ -398,7 +398,6 @@ def launcher(event_loop):
|
|||||||
|
|
||||||
if not use_flash_attention:
|
if not use_flash_attention:
|
||||||
env["USE_FLASH_ATTENTION"] = "false"
|
env["USE_FLASH_ATTENTION"] = "false"
|
||||||
env["USE_PREFIX_CACHING"] = "0"
|
|
||||||
|
|
||||||
with tempfile.TemporaryFile("w+") as tmp:
|
with tempfile.TemporaryFile("w+") as tmp:
|
||||||
# We'll output stdout/stderr to a temporary file. Using a pipe
|
# We'll output stdout/stderr to a temporary file. Using a pipe
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
install-flashinfer:
|
install-flashinfer:
|
||||||
pip install flashinfer==0.1.4 -i https://flashinfer.ai/whl/cu124/torch2.4
|
pip install flashinfer==0.1.5 -i https://flashinfer.ai/whl/cu124/torch2.4
|
||||||
|
Loading…
Reference in New Issue
Block a user