mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Basic flashinfer 0.2 support
This change does not use any of the new features yet, but makes some small compatibility changes.
This commit is contained in:
parent
23bc38b10d
commit
41c9973d07
@ -978,15 +978,16 @@
|
|||||||
"nixpkgs": "nixpkgs_6"
|
"nixpkgs": "nixpkgs_6"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1732218602,
|
"lastModified": 1734861790,
|
||||||
"narHash": "sha256-BElslL34KjOJCFMPkNtilOz6S/7iY7Vd72FNbRRWKDY=",
|
"narHash": "sha256-3afC0dDIkjOICziL4voDchZIkP14g8KM0xilGjt0cio=",
|
||||||
"owner": "huggingface",
|
"owner": "huggingface",
|
||||||
"repo": "text-generation-inference-nix",
|
"repo": "text-generation-inference-nix",
|
||||||
"rev": "f79638ac4e420e661321261744e745a3a747e182",
|
"rev": "29728b3bb43517114aa3025a270bcda4fe78de9f",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "huggingface",
|
"owner": "huggingface",
|
||||||
|
"ref": "flashinfer-v0.2",
|
||||||
"repo": "text-generation-inference-nix",
|
"repo": "text-generation-inference-nix",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
||||||
};
|
};
|
||||||
nix-filter.url = "github:numtide/nix-filter";
|
nix-filter.url = "github:numtide/nix-filter";
|
||||||
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
|
tgi-nix.url = "github:huggingface/text-generation-inference-nix/flashinfer-v0.2";
|
||||||
nixpkgs.follows = "tgi-nix/nixpkgs";
|
nixpkgs.follows = "tgi-nix/nixpkgs";
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
rust-overlay = {
|
rust-overlay = {
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
install-flashinfer:
|
install-flashinfer:
|
||||||
pip install flashinfer==0.1.6 -i https://flashinfer.ai/whl/cu124/torch2.4
|
pip install flashinfer==0.2.0 -i https://flashinfer.ai/whl/cu124/torch2.4
|
||||||
|
@ -93,7 +93,7 @@ def use_prefill_with_paged_kv_state(
|
|||||||
head_dim=head_size,
|
head_dim=head_size,
|
||||||
q_data_type=dtype,
|
q_data_type=dtype,
|
||||||
page_size=page_size,
|
page_size=page_size,
|
||||||
window_left=window_left,
|
window_left=-1 if window_left is None else window_left,
|
||||||
)
|
)
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
@ -139,7 +139,7 @@ def use_prefill_state(
|
|||||||
num_kv_heads=num_kv_heads,
|
num_kv_heads=num_kv_heads,
|
||||||
head_dim=head_size,
|
head_dim=head_size,
|
||||||
q_data_type=dtype,
|
q_data_type=dtype,
|
||||||
window_left=window_left,
|
window_left=-1 if window_left is None else window_left,
|
||||||
)
|
)
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
@ -243,7 +243,7 @@ def use_decode_state(
|
|||||||
page_size=page_size,
|
page_size=page_size,
|
||||||
data_type=kv_cache_dtype,
|
data_type=kv_cache_dtype,
|
||||||
q_data_type=dtype,
|
q_data_type=dtype,
|
||||||
window_left=window_left,
|
window_left=-1 if window_left is None else window_left,
|
||||||
)
|
)
|
||||||
yield
|
yield
|
||||||
finally:
|
finally:
|
||||||
|
Loading…
Reference in New Issue
Block a user