mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
attention -> paged-attention
This commit is contained in:
parent
8ad383c7cb
commit
96a4d4d083
@ -978,11 +978,11 @@
|
|||||||
"nixpkgs": "nixpkgs_6"
|
"nixpkgs": "nixpkgs_6"
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1738752518,
|
"lastModified": 1738769628,
|
||||||
"narHash": "sha256-+Cm517pJIgUJ2jMwQyR7qZ96u410eHMk3rTarHXkbug=",
|
"narHash": "sha256-hgHf1mscFbH9XtT3dYtFQcxRfict9N+Vi6QSW1c+FjU=",
|
||||||
"owner": "huggingface",
|
"owner": "huggingface",
|
||||||
"repo": "text-generation-inference-nix",
|
"repo": "text-generation-inference-nix",
|
||||||
"rev": "c9b5c8e48b96961125ada3075e21074844740fe1",
|
"rev": "9a5a58219dead9704d83d9d32f105b6b90bd31f2",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
buildPythonPackage,
|
buildPythonPackage,
|
||||||
poetry-core,
|
poetry-core,
|
||||||
mypy-protobuf,
|
mypy-protobuf,
|
||||||
attention,
|
|
||||||
awq-inference-engine,
|
awq-inference-engine,
|
||||||
causal-conv1d,
|
causal-conv1d,
|
||||||
compressed-tensors,
|
compressed-tensors,
|
||||||
@ -29,6 +28,7 @@
|
|||||||
opentelemetry-instrumentation-grpc,
|
opentelemetry-instrumentation-grpc,
|
||||||
opentelemetry-semantic-conventions,
|
opentelemetry-semantic-conventions,
|
||||||
outlines,
|
outlines,
|
||||||
|
paged-attention,
|
||||||
peft,
|
peft,
|
||||||
pillow,
|
pillow,
|
||||||
prometheus-client,
|
prometheus-client,
|
||||||
@ -79,7 +79,6 @@ buildPythonPackage {
|
|||||||
pythonRemoveDeps = [ "scipy" ];
|
pythonRemoveDeps = [ "scipy" ];
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
attention
|
|
||||||
awq-inference-engine
|
awq-inference-engine
|
||||||
eetq
|
eetq
|
||||||
causal-conv1d
|
causal-conv1d
|
||||||
@ -104,6 +103,7 @@ buildPythonPackage {
|
|||||||
opentelemetry-instrumentation-grpc
|
opentelemetry-instrumentation-grpc
|
||||||
opentelemetry-semantic-conventions
|
opentelemetry-semantic-conventions
|
||||||
outlines
|
outlines
|
||||||
|
paged-attention
|
||||||
peft
|
peft
|
||||||
pillow
|
pillow
|
||||||
prometheus-client
|
prometheus-client
|
||||||
|
@ -1,246 +1,246 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"repo_id": "kernels-community/attention",
|
"repo_id": "kernels-community/paged-attention",
|
||||||
"sha": "20100e6a97f0fa1465560aa21eecbf4b04d3d93a",
|
"sha": "331b7e63a6b592799c8bc992f681bb1ee2c865a2",
|
||||||
"files": [
|
"files": [
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_attention_6yvgebnqctora.abi3.so",
|
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "29733cfb726d11a1d278fb0f3679c010cf5210e2"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "1379d7cc10c5fafa877e3ea73be33d3eed57b449"
|
"blob_id": "609570440c63122010e6254ac2f92d4e4e52ec02"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu118-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_fao6f4gjjrpl6.abi3.so",
|
||||||
|
"blob_id": "a4e60f2c567eb63c84430e9b80acaa0aa6974b1e"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_attention_4jg2igd54wzge.abi3.so",
|
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "a58d380aa758b8e6842e89013229bee3711286ef"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "9dee16955e9d988953733fae4e743d92886c92b1"
|
"blob_id": "9e52382b912b4e2d07f84982f762345debdbbfc8"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu121-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/_paged_attention_eo7ts45r6k64y.abi3.so",
|
||||||
|
"blob_id": "c20f9501a41daa820dfda27434674d032931b51e"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx11-cu121-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_attention_syg6kbhkhc4xk.abi3.so",
|
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "369150e0964eaca52c0c7906addf9f18d8ec7270"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "0bac0403831e313bcf9cbab1a35c2cbe4d5ef08f"
|
"blob_id": "5f01e3f8c4ae3a031f109f78e010014d34347647"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx11-cu124-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_5odgyxqhwqtv2.abi3.so",
|
||||||
|
"blob_id": "74f9714690337f49661c641a4f60f6e1e1f56cfa"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_attention_hhzgzhvc7zviy.abi3.so",
|
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "05529e8bcee239db92984acb3e19926697c64a3f"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "270fd3d0005a3e44dc6625c3ab4948a7fa7892bb"
|
"blob_id": "a3016a6b1cd7ae051012084bbd39d6f2e0913ace"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu118-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_uy2moinaww2jc.abi3.so",
|
||||||
|
"blob_id": "445652acd4719542710cda86a2d08c70a56c8094"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_attention_gbi5gm244waic.abi3.so",
|
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "cb6cccabe445cbf7bfd797b4645300e5a2a4ec38"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "a517876400c08f9800107c61d6ca3f57e0bdc2e6"
|
"blob_id": "e2cd992a80d4b938f243f0e6060e863278aca7f6"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu121-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/_paged_attention_35dt23tewn2p2.abi3.so",
|
||||||
|
"blob_id": "1f6414c382a753edb7512927ac5f3e31b196531d"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx98-cu121-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_attention_ill75rmpj7yds.abi3.so",
|
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "bf93abf5555357ad397844421fcfc66ae0743166"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "f49b90de8bda122b2049bf57f5012b60e05364fe"
|
"blob_id": "150412d67365be8ae5668f83d1939148bb576050"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch25-cxx98-cu124-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_fhq57q56w3m5o.abi3.so",
|
||||||
|
"blob_id": "ee97eee26a4de8d14d7ccdadaf406eed8405de39"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch25-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_attention_6qe5ft3kiteru.abi3.so",
|
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "0bbd1dc682174c9d7fba2ee7426e1183e668ab79"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "f9b2a39308433746718b31f0d9830b27f72f5242"
|
"blob_id": "2bfef111c96308e595eb628bc88ab660a443089c"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu118-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/_paged_attention_xvepb4loq5mm2.abi3.so",
|
||||||
|
"blob_id": "1ea51bd49f8ec76bbe306a261021da52fe6a980f"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx11-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_attention_ftq3cjdxqfw4m.abi3.so",
|
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "d7fa42c3682924a46e9c5b4a7e847a6b4415c5c8"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "27b44593d2252bfe5399c8dcd883aa497223f158"
|
"blob_id": "8928daeec47128544cef187bf18f214fc2238019"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu124-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/_paged_attention_uyfdujhnc2xoe.abi3.so",
|
||||||
|
"blob_id": "cf8ebe40f27db0fa87c46d7b4066494e65843820"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx11-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_attention_lkibbjh726iwm.abi3.so",
|
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "4a4cccfd49090ac213bbf562a9c4bb2ff2920eb0"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "ac89377661ed1c5f2eca40cf199a15209af0c05c"
|
"blob_id": "dff8537df63e1ef37769a6b7ba6b8c58192d7faa"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx11-cu126-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/_paged_attention_pervvqmod6pi4.abi3.so",
|
||||||
|
"blob_id": "77eb42e3471e9aa84d1f5d9854995c9737ed6bf3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx11-cu126-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_attention_vbhagz24hyij6.abi3.so",
|
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "4d87629674e87a746aaec4ccadb26bb2a72f2d43"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "2f05f1ffd05c49971dfc9b45971efb5a055c7e52"
|
"blob_id": "543c64d1589cb1747d7dc1ac29bd8f2cbeb61ab7"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu118-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/_paged_attention_24rowhxd5ebcc.abi3.so",
|
||||||
|
"blob_id": "43ec3529d8eac816c31cc1eaad4cc2baa3cbd3d6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx98-cu118-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_attention_sfjvhlixssyce.abi3.so",
|
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "ee6153972f28bd997e1fc4a7eaaf425fd5adc918"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "530d483cdf8243f6c863ca49c0e87018634e69d0"
|
"blob_id": "1d62b9bb1cfb040d7f68cd108ac9067100b4cf2d"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu124-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/_paged_attention_5yleoqr3zje4w.abi3.so",
|
||||||
|
"blob_id": "ffed60cc0a3948bdea6aa7fb4d486d9b943215ec"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx98-cu124-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/__init__.py",
|
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/__init__.py",
|
||||||
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
"blob_id": "9de56043369487facc1f163df6bd319c9806e5ca"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_attention_g7oqtcveiuapk.abi3.so",
|
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_custom_ops.py",
|
||||||
"blob_id": "fe58b4ce4158bf5ee55371329396ac8e573cfc85"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_custom_ops.py",
|
|
||||||
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
"blob_id": "a0c0b8db085468dee5100c98d14106a9ee917bf2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/_ops.py",
|
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_ops.py",
|
||||||
"blob_id": "1e504e67dd25c4aa79bcc509316f3f23e6e3e6ef"
|
"blob_id": "ee817d13be64b46e3cb44ad192af4a5f3817bbf7"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"filename": "build/torch26-cxx98-cu126-x86_64-linux/attention/platforms.py",
|
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/_paged_attention_3rbp7xipfucgo.abi3.so",
|
||||||
|
"blob_id": "5d5b3ffda2fd6a830d12341bab26dc5ec03f4a86"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"filename": "build/torch26-cxx98-cu126-x86_64-linux/paged_attention/platforms.py",
|
||||||
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
"blob_id": "aa06132e74cd7fb634044a76e528979b02a3559b"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -39,7 +39,7 @@ requires = ["hf-kernels>=0.1.2", "setuptools"]
|
|||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
[tool.kernels.dependencies]
|
[tool.kernels.dependencies]
|
||||||
"kernels-community/attention" = ">=0.0.1"
|
"kernels-community/paged-attention" = ">=0.0.2"
|
||||||
"kernels-community/moe" = ">=0.1.1"
|
"kernels-community/moe" = ">=0.1.1"
|
||||||
"kernels-community/quantization" = ">=0.0.3"
|
"kernels-community/quantization" = ">=0.0.3"
|
||||||
|
|
||||||
|
@ -16,15 +16,15 @@ _PARTITION_SIZE = 512
|
|||||||
|
|
||||||
if SYSTEM == "cuda":
|
if SYSTEM == "cuda":
|
||||||
try:
|
try:
|
||||||
attention_kernels = load_kernel(
|
paged_attention_kernels = load_kernel(
|
||||||
module="attention", repo_id="kernels-community/attention"
|
module="paged_attention", repo_id="kernels-community/paged-attention"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
attention_kernels = None
|
paged_attention_kernels = None
|
||||||
|
|
||||||
|
|
||||||
def paged_attention(
|
def paged_attention(
|
||||||
@ -129,7 +129,7 @@ def paged_attention(
|
|||||||
max_num_partitions == 1 or num_seqs * num_heads > 512
|
max_num_partitions == 1 or num_seqs * num_heads > 512
|
||||||
)
|
)
|
||||||
if use_v1:
|
if use_v1:
|
||||||
attention_kernels.paged_attention_v1(
|
paged_attention_kernels.paged_attention_v1(
|
||||||
out,
|
out,
|
||||||
query,
|
query,
|
||||||
kv_cache.key,
|
kv_cache.key,
|
||||||
@ -160,7 +160,7 @@ def paged_attention(
|
|||||||
)
|
)
|
||||||
max_logits = torch.empty_like(exp_sums)
|
max_logits = torch.empty_like(exp_sums)
|
||||||
|
|
||||||
attention_kernels.paged_attention_v2(
|
paged_attention_kernels.paged_attention_v2(
|
||||||
out,
|
out,
|
||||||
exp_sums,
|
exp_sums,
|
||||||
max_logits,
|
max_logits,
|
||||||
|
@ -13,15 +13,15 @@ from text_generation_server.utils.weights import Weights
|
|||||||
|
|
||||||
if SYSTEM == "cuda":
|
if SYSTEM == "cuda":
|
||||||
try:
|
try:
|
||||||
attention_kernels = load_kernel(
|
paged_attention = load_kernel(
|
||||||
module="attention", repo_id="kernels-community/attention"
|
module="paged_attention", repo_id="kernels-community/paged-attention"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
f"Could not import attention kernels. Make sure your installation is correct. Complete error: {e}"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
attention_kernels = None
|
paged_attention = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@ -237,7 +237,7 @@ def paged_reshape_and_cache(
|
|||||||
if key_cache.dtype == torch.float8_e4m3fn:
|
if key_cache.dtype == torch.float8_e4m3fn:
|
||||||
kv_cache_dtype = "fp8"
|
kv_cache_dtype = "fp8"
|
||||||
|
|
||||||
attention_kernels.reshape_and_cache(
|
paged_attention.reshape_and_cache(
|
||||||
key,
|
key,
|
||||||
value,
|
value,
|
||||||
key_cache,
|
key_cache,
|
||||||
|
Loading…
Reference in New Issue
Block a user