mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
attention -> paged-attention
This commit is contained in:
parent
8ad383c7cb
commit
96a4d4d083
@ -978,11 +978,11 @@
|
||||
"nixpkgs": "nixpkgs_6"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1738752518,
|
||||
"narHash": "sha256-+Cm517pJIgUJ2jMwQyR7qZ96u410eHMk3rTarHXkbug=",
|
||||
"lastModified": 1738769628,
|
||||
"narHash": "sha256-hgHf1mscFbH9XtT3dYtFQcxRfict9N+Vi6QSW1c+FjU=",
|
||||
"owner": "huggingface",
|
||||
"repo": "text-generation-inference-nix",
|
||||
"rev": "c9b5c8e48b96961125ada3075e21074844740fe1",
|
||||
"rev": "9a5a58219dead9704d83d9d32f105b6b90bd31f2",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
@ -3,7 +3,6 @@
|
||||
buildPythonPackage,
|
||||
poetry-core,
|
||||
mypy-protobuf,
|
||||
attention,
|
||||
awq-inference-engine,
|
||||
causal-conv1d,
|
||||
compressed-tensors,
|
||||
@ -29,6 +28,7 @@
|
||||
opentelemetry-instrumentation-grpc,
|
||||
opentelemetry-semantic-conventions,
|
||||
outlines,
|
||||
paged-attention,
|
||||
peft,
|
||||
pillow,
|
||||
prometheus-client,
|
||||
@ -79,7 +79,6 @@ buildPythonPackage {
|
||||
pythonRemoveDeps = [ "scipy" ];
|
||||
|
||||
dependencies = [
|
||||
attention
|
||||
awq-inference-engine
|
||||
eetq
|
||||
causal-conv1d
|
||||
@ -104,6 +103,7 @@ buildPythonPackage {
|
||||
opentelemetry-instrumentation-grpc
|
||||
opentelemetry-semantic-conventions
|
||||
outlines
|
||||
paged-attention
|
||||
peft
|
||||
pillow
|
||||
prometheus-client
|
||||
|
@ -1,246 +1,246 @@
|
||||
[
|
||||
{
|
||||
"repo_id": "kernels-community/attention",
|
||||
"sha": "20100e6a97f0fa1465560aa21eecbf4b04d3d93a",
|
||||
"repo_id": "kernels-community/paged-attention",
|
||||
"sha": "331b7e63a6b592799c8bc992f681bb1ee2c865a2",
|
||||
"files": [
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_attention_6yvgebnqctora.abi3.so",
|
||||
"blob_id": "29733cfb726d11a1d278fb0f3679c010cf5210e2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "1379d7cc10c5fafa877e3ea73be33d3eed57b449"
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "609570440c63122010e6254ac2f92d4e4e52ec02"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_fao6f4gjjrpl6.abi3.so",
|
||||
"blob_id": "a4e60f2c567eb63c84430e9b80acaa0aa6974b1e"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_attention_4jg2igd54wzge.abi3.so",
|
||||
"blob_id": "a58d380aa758b8e6842e89013229bee3711286ef"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "9dee16955e9d988953733fae4e743d92886c92b1"
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "9e52382b912b4e2d07f84982f762345debdbbfc8"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_eo7ts45r6k64y.abi3.so",
|
||||
"blob_id": "c20f9501a41daa820dfda27434674d032931b51e"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_attention_syg6kbhkhc4xk.abi3.so",
|
||||
"blob_id": "369150e0964eaca52c0c7906addf9f18d8ec7270"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "0bac0403831e313bcf9cbab1a35c2cbe4d5ef08f"
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "5f01e3f8c4ae3a031f109f78e010014d34347647"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_5odgyxqhwqtv2.abi3.so",
|
||||
"blob_id": "74f9714690337f49661c641a4f60f6e1e1f56cfa"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_attention_hhzgzhvc7zviy.abi3.so",
|
||||
"blob_id": "05529e8bcee239db92984acb3e19926697c64a3f"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "270fd3d0005a3e44dc6625c3ab4948a7fa7892bb"
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "a3016a6b1cd7ae051012084bbd39d6f2e0913ace"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_uy2moinaww2jc.abi3.so",
|
||||
"blob_id": "445652acd4719542710cda86a2d08c70a56c8094"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_attention_gbi5gm244waic.abi3.so",
|
||||
"blob_id": "cb6cccabe445cbf7bfd797b4645300e5a2a4ec38"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "a517876400c08f9800107c61d6ca3f57e0bdc2e6"
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "e2cd992a80d4b938f243f0e6060e863278aca7f6"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_35dt23tewn2p2.abi3.so",
|
||||
"blob_id": "1f6414c382a753edb7512927ac5f3e31b196531d"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_attention_ill75rmpj7yds.abi3.so",
|
||||
"blob_id": "bf93abf5555357ad397844421fcfc66ae0743166"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "f49b90de8bda122b2049bf57f5012b60e05364fe"
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "150412d67365be8ae5668f83d1939148bb576050"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_fhq57q56w3m5o.abi3.so",
|
||||
"blob_id": "ee97eee26a4de8d14d7ccdadaf406eed8405de39"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_attention_6qe5ft3kiteru.abi3.so",
|
||||
"blob_id": "0bbd1dc682174c9d7fba2ee7426e1183e668ab79"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "f9b2a39308433746718b31f0d9830b27f72f5242"
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "2bfef111c96308e595eb628bc88ab660a443089c"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_xvepb4loq5mm2.abi3.so",
|
||||
"blob_id": "1ea51bd49f8ec76bbe306a261021da52fe6a980f"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_attention_ftq3cjdxqfw4m.abi3.so",
|
||||
"blob_id": "d7fa42c3682924a46e9c5b4a7e847a6b4415c5c8"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "27b44593d2252bfe5399c8dcd883aa497223f158"
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "8928daeec47128544cef187bf18f214fc2238019"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_uyfdujhnc2xoe.abi3.so",
|
||||
"blob_id": "cf8ebe40f27db0fa87c46d7b4066494e65843820"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_attention_lkibbjh726iwm.abi3.so",
|
||||
"blob_id": "4a4cccfd49090ac213bbf562a9c4bb2ff2920eb0"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "ac89377661ed1c5f2eca40cf199a15209af0c05c"
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "dff8537df63e1ef37769a6b7ba6b8c58192d7faa"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_pervvqmod6pi4.abi3.so",
|
||||
"blob_id": "77eb42e3471e9aa84d1f5d9854995c9737ed6bf3"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_attention_vbhagz24hyij6.abi3.so",
|
||||
"blob_id": "4d87629674e87a746aaec4ccadb26bb2a72f2d43"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "2f05f1ffd05c49971dfc9b45971efb5a055c7e52"
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "543c64d1589cb1747d7dc1ac29bd8f2cbeb61ab7"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_24rowhxd5ebcc.abi3.so",
|
||||
"blob_id": "43ec3529d8eac816c31cc1eaad4cc2baa3cbd3d6"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_attention_sfjvhlixssyce.abi3.so",
|
||||
"blob_id": "ee6153972f28bd997e1fc4a7eaaf425fd5adc918"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "530d483cdf8243f6c863ca49c0e87018634e69d0"
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "1d62b9bb1cfb040d7f68cd108ac9067100b4cf2d"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_5yleoqr3zje4w.abi3.so",
|
||||
"blob_id": "ffed60cc0a3948bdea6aa7fb4d486d9b943215ec"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/__init__.py",
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__init__.py",
|
||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_attention_g7oqtcveiuapk.abi3.so",
|
||||
"blob_id": "fe58b4ce4158bf5ee55371329396ac8e573cfc85"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_custom_ops.py",
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_custom_ops.py",
|
||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_ops.py",
|
||||
"blob_id": "1e504e67dd25c4aa79bcc509316f3f23e6e3e6ef"
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py",
|
||||
"blob_id": "ee817d13be64b46e3cb44ad192af4a5f3817bbf7"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/platforms.py",
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_3rbp7xipfucgo.abi3.so",
|
||||
"blob_id": "5d5b3ffda2fd6a830d12341bab26dc5ec03f4a86"
|
||||
},
|
||||
{
|
||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py",
|
||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||
}
|
||||
]
|
||||
|
@ -39,7 +39,7 @@ requires = ["hf-kernels>=0.1.2", "setuptools"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.kernels.dependencies]
|
||||
"kernels-community/attention" = ">=0.0.1"
|
||||
"kernels-community/paged-attention" = ">=0.0.2"
|
||||
"kernels-community/moe" = ">=0.1.1"
|
||||
"kernels-community/quantization" = ">=0.0.3"
|
||||
|
||||
|
@ -16,15 +16,15 @@ _PARTITION_SIZE = 512
|
||||
|
||||
if SYSTEM == "cuda":
|
||||
try:
|
||||
attention_kernels = load_kernel(
|
||||
module="attention", repo_id="kernels-community/attention"
|
||||
paged_attention_kernels = load_kernel(
|
||||
module="paged_attention", repo_id="kernels-community/paged-attention"
|
||||
)
|
||||
except Exception as e:
|
||||
raise ImportError(
|
||||
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
||||
)
|
||||
else:
|
||||
attention_kernels = None
|
||||
paged_attention_kernels = None
|
||||
|
||||
|
||||
def paged_attention(
|
||||
@ -129,7 +129,7 @@ def paged_attention(
|
||||
max_num_partitions == 1 or num_seqs * num_heads > 512
|
||||
)
|
||||
if use_v1:
|
||||
attention_kernels.paged_attention_v1(
|
||||
paged_attention_kernels.paged_attention_v1(
|
||||
out,
|
||||
query,
|
||||
kv_cache.key,
|
||||
@ -160,7 +160,7 @@ def paged_attention(
|
||||
)
|
||||
max_logits = torch.empty_like(exp_sums)
|
||||
|
||||
attention_kernels.paged_attention_v2(
|
||||
paged_attention_kernels.paged_attention_v2(
|
||||
out,
|
||||
exp_sums,
|
||||
max_logits,
|
||||
|
@ -13,15 +13,15 @@ from text_generation_server.utils.weights import Weights
|
||||
|
||||
if SYSTEM == "cuda":
|
||||
try:
|
||||
attention_kernels = load_kernel(
|
||||
module="attention", repo_id="kernels-community/attention"
|
||||
paged_attention = load_kernel(
|
||||
module="paged_attention", repo_id="kernels-community/paged-attention"
|
||||
)
|
||||
except Exception as e:
|
||||
raise ImportError(
|
||||
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
||||
)
|
||||
else:
|
||||
attention_kernels = None
|
||||
paged_attention = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -237,7 +237,7 @@ def paged_reshape_and_cache(
|
||||
if key_cache.dtype == torch.float8_e4m3fn:
|
||||
kv_cache_dtype = "fp8"
|
||||
|
||||
attention_kernels.reshape_and_cache(
|
||||
paged_attention.reshape_and_cache(
|
||||
key,
|
||||
value,
|
||||
key_cache,
|
||||
|
Loading…
Reference in New Issue
Block a user