attention -> paged-attention

This commit is contained in:
Daniël de Kok 2025-02-05 15:39:04 +00:00
parent 8ad383c7cb
commit 96a4d4d083
6 changed files with 126 additions and 126 deletions

View File

@ -978,11 +978,11 @@
"nixpkgs": "nixpkgs_6" "nixpkgs": "nixpkgs_6"
}, },
"locked": { "locked": {
"lastModified": 1738752518, "lastModified": 1738769628,
"narHash": "sha256-+Cm517pJIgUJ2jMwQyR7qZ96u410eHMk3rTarHXkbug=", "narHash": "sha256-hgHf1mscFbH9XtT3dYtFQcxRfict9N+Vi6QSW1c+FjU=",
"owner": "huggingface", "owner": "huggingface",
"repo": "text-generation-inference-nix", "repo": "text-generation-inference-nix",
"rev": "c9b5c8e48b96961125ada3075e21074844740fe1", "rev": "9a5a58219dead9704d83d9d32f105b6b90bd31f2",
"type": "github" "type": "github"
}, },
"original": { "original": {

View File

@ -3,7 +3,6 @@
buildPythonPackage, buildPythonPackage,
poetry-core, poetry-core,
mypy-protobuf, mypy-protobuf,
attention,
awq-inference-engine, awq-inference-engine,
causal-conv1d, causal-conv1d,
compressed-tensors, compressed-tensors,
@ -29,6 +28,7 @@
opentelemetry-instrumentation-grpc, opentelemetry-instrumentation-grpc,
opentelemetry-semantic-conventions, opentelemetry-semantic-conventions,
outlines, outlines,
paged-attention,
peft, peft,
pillow, pillow,
prometheus-client, prometheus-client,
@ -79,7 +79,6 @@ buildPythonPackage {
pythonRemoveDeps = [ "scipy" ]; pythonRemoveDeps = [ "scipy" ];
dependencies = [ dependencies = [
attention
awq-inference-engine awq-inference-engine
eetq eetq
causal-conv1d causal-conv1d
@ -104,6 +103,7 @@ buildPythonPackage {
opentelemetry-instrumentation-grpc opentelemetry-instrumentation-grpc
opentelemetry-semantic-conventions opentelemetry-semantic-conventions
outlines outlines
paged-attention
peft peft
pillow pillow
prometheus-client prometheus-client

View File

@ -1,246 +1,246 @@
[ [
{ {
"repo_id": "kernels-community/attention", "repo_id": "kernels-community/paged-attention",
"sha": "20100e6a97f0fa1465560aa21eecbf4b04d3d93a", "sha": "331b7e63a6b592799c8bc992f681bb1ee2c865a2",
"files": [ "files": [
{ {
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_attention_6yvgebnqctora.abi3.so", "filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "29733cfb726d11a1d278fb0f3679c010cf5210e2"
},
{
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
"blob_id": "1379d7cc10c5fafa877e3ea73be33d3eed57b449" "blob_id": "609570440c63122010e6254ac2f92d4e4e52ec02"
}, },
{ {
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_fao6f4gjjrpl6.abi3.so",
"blob_id": "a4e60f2c567eb63c84430e9b80acaa0aa6974b1e"
},
{
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_attention_4jg2igd54wzge.abi3.so", "filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "a58d380aa758b8e6842e89013229bee3711286ef"
},
{
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py",
"blob_id": "9dee16955e9d988953733fae4e743d92886c92b1" "blob_id": "9e52382b912b4e2d07f84982f762345debdbbfc8"
}, },
{ {
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_eo7ts45r6k64y.abi3.so",
"blob_id": "c20f9501a41daa820dfda27434674d032931b51e"
},
{
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_attention_syg6kbhkhc4xk.abi3.so", "filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "369150e0964eaca52c0c7906addf9f18d8ec7270"
},
{
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
"blob_id": "0bac0403831e313bcf9cbab1a35c2cbe4d5ef08f" "blob_id": "5f01e3f8c4ae3a031f109f78e010014d34347647"
}, },
{ {
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_5odgyxqhwqtv2.abi3.so",
"blob_id": "74f9714690337f49661c641a4f60f6e1e1f56cfa"
},
{
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_attention_hhzgzhvc7zviy.abi3.so", "filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "05529e8bcee239db92984acb3e19926697c64a3f"
},
{
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
"blob_id": "270fd3d0005a3e44dc6625c3ab4948a7fa7892bb" "blob_id": "a3016a6b1cd7ae051012084bbd39d6f2e0913ace"
}, },
{ {
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_uy2moinaww2jc.abi3.so",
"blob_id": "445652acd4719542710cda86a2d08c70a56c8094"
},
{
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_attention_gbi5gm244waic.abi3.so", "filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "cb6cccabe445cbf7bfd797b4645300e5a2a4ec38"
},
{
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py",
"blob_id": "a517876400c08f9800107c61d6ca3f57e0bdc2e6" "blob_id": "e2cd992a80d4b938f243f0e6060e863278aca7f6"
}, },
{ {
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_35dt23tewn2p2.abi3.so",
"blob_id": "1f6414c382a753edb7512927ac5f3e31b196531d"
},
{
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/__init__.py", "filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_attention_ill75rmpj7yds.abi3.so", "filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "bf93abf5555357ad397844421fcfc66ae0743166"
},
{
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_ops.py", "filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
"blob_id": "f49b90de8bda122b2049bf57f5012b60e05364fe" "blob_id": "150412d67365be8ae5668f83d1939148bb576050"
}, },
{ {
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/platforms.py", "filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_fhq57q56w3m5o.abi3.so",
"blob_id": "ee97eee26a4de8d14d7ccdadaf406eed8405de39"
},
{
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_attention_6qe5ft3kiteru.abi3.so", "filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "0bbd1dc682174c9d7fba2ee7426e1183e668ab79"
},
{
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
"blob_id": "f9b2a39308433746718b31f0d9830b27f72f5242" "blob_id": "2bfef111c96308e595eb628bc88ab660a443089c"
}, },
{ {
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_xvepb4loq5mm2.abi3.so",
"blob_id": "1ea51bd49f8ec76bbe306a261021da52fe6a980f"
},
{
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_attention_ftq3cjdxqfw4m.abi3.so", "filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "d7fa42c3682924a46e9c5b4a7e847a6b4415c5c8"
},
{
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
"blob_id": "27b44593d2252bfe5399c8dcd883aa497223f158" "blob_id": "8928daeec47128544cef187bf18f214fc2238019"
}, },
{ {
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_uyfdujhnc2xoe.abi3.so",
"blob_id": "cf8ebe40f27db0fa87c46d7b4066494e65843820"
},
{
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_attention_lkibbjh726iwm.abi3.so", "filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "4a4cccfd49090ac213bbf562a9c4bb2ff2920eb0"
},
{
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py",
"blob_id": "ac89377661ed1c5f2eca40cf199a15209af0c05c" "blob_id": "dff8537df63e1ef37769a6b7ba6b8c58192d7faa"
}, },
{ {
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_pervvqmod6pi4.abi3.so",
"blob_id": "77eb42e3471e9aa84d1f5d9854995c9737ed6bf3"
},
{
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_attention_vbhagz24hyij6.abi3.so", "filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "4d87629674e87a746aaec4ccadb26bb2a72f2d43"
},
{
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
"blob_id": "2f05f1ffd05c49971dfc9b45971efb5a055c7e52" "blob_id": "543c64d1589cb1747d7dc1ac29bd8f2cbeb61ab7"
}, },
{ {
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_24rowhxd5ebcc.abi3.so",
"blob_id": "43ec3529d8eac816c31cc1eaad4cc2baa3cbd3d6"
},
{
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_attention_sfjvhlixssyce.abi3.so", "filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "ee6153972f28bd997e1fc4a7eaaf425fd5adc918"
},
{
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
"blob_id": "530d483cdf8243f6c863ca49c0e87018634e69d0" "blob_id": "1d62b9bb1cfb040d7f68cd108ac9067100b4cf2d"
}, },
{ {
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_5yleoqr3zje4w.abi3.so",
"blob_id": "ffed60cc0a3948bdea6aa7fb4d486d9b943215ec"
},
{
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
}, },
{ {
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/__init__.py", "filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__init__.py",
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca" "blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
}, },
{ {
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_attention_g7oqtcveiuapk.abi3.so", "filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_custom_ops.py",
"blob_id": "fe58b4ce4158bf5ee55371329396ac8e573cfc85"
},
{
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_custom_ops.py",
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2" "blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
}, },
{ {
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_ops.py", "filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py",
"blob_id": "1e504e67dd25c4aa79bcc509316f3f23e6e3e6ef" "blob_id": "ee817d13be64b46e3cb44ad192af4a5f3817bbf7"
}, },
{ {
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/platforms.py", "filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_3rbp7xipfucgo.abi3.so",
"blob_id": "5d5b3ffda2fd6a830d12341bab26dc5ec03f4a86"
},
{
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py",
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b" "blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
} }
] ]

View File

@ -39,7 +39,7 @@ requires = ["hf-kernels>=0.1.2", "setuptools"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
[tool.kernels.dependencies] [tool.kernels.dependencies]
"kernels-community/attention" = ">=0.0.1" "kernels-community/paged-attention" = ">=0.0.2"
"kernels-community/moe" = ">=0.1.1" "kernels-community/moe" = ">=0.1.1"
"kernels-community/quantization" = ">=0.0.3" "kernels-community/quantization" = ">=0.0.3"

View File

@ -16,15 +16,15 @@ _PARTITION_SIZE = 512
if SYSTEM == "cuda": if SYSTEM == "cuda":
try: try:
attention_kernels = load_kernel( paged_attention_kernels = load_kernel(
module="attention", repo_id="kernels-community/attention" module="paged_attention", repo_id="kernels-community/paged-attention"
) )
except Exception as e: except Exception as e:
raise ImportError( raise ImportError(
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}" f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
) )
else: else:
attention_kernels = None paged_attention_kernels = None
def paged_attention( def paged_attention(
@ -129,7 +129,7 @@ def paged_attention(
max_num_partitions == 1 or num_seqs * num_heads > 512 max_num_partitions == 1 or num_seqs * num_heads > 512
) )
if use_v1: if use_v1:
attention_kernels.paged_attention_v1( paged_attention_kernels.paged_attention_v1(
out, out,
query, query,
kv_cache.key, kv_cache.key,
@ -160,7 +160,7 @@ def paged_attention(
) )
max_logits = torch.empty_like(exp_sums) max_logits = torch.empty_like(exp_sums)
attention_kernels.paged_attention_v2( paged_attention_kernels.paged_attention_v2(
out, out,
exp_sums, exp_sums,
max_logits, max_logits,

View File

@ -13,15 +13,15 @@ from text_generation_server.utils.weights import Weights
if SYSTEM == "cuda": if SYSTEM == "cuda":
try: try:
attention_kernels = load_kernel( paged_attention = load_kernel(
module="attention", repo_id="kernels-community/attention" module="paged_attention", repo_id="kernels-community/paged-attention"
) )
except Exception as e: except Exception as e:
raise ImportError( raise ImportError(
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}" f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
) )
else: else:
attention_kernels = None paged_attention = None
@dataclass @dataclass
@ -237,7 +237,7 @@ def paged_reshape_and_cache(
if key_cache.dtype == torch.float8_e4m3fn: if key_cache.dtype == torch.float8_e4m3fn:
kv_cache_dtype = "fp8" kv_cache_dtype = "fp8"
attention_kernels.reshape_and_cache( paged_attention.reshape_and_cache(
key, key,
value, value,
key_cache, key_cache,