From 719d7b4d54d294a71d9dc603a68881790ee5a01c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 19 Aug 2024 16:56:06 +0200 Subject: [PATCH] Disabling flashinfer/prefix caching on odd head_dim --- flake.lock | 963 ------------------------------------------- launcher/src/main.rs | 32 ++ 2 files changed, 32 insertions(+), 963 deletions(-) delete mode 100644 flake.lock diff --git a/flake.lock b/flake.lock deleted file mode 100644 index 69f9ef13..00000000 --- a/flake.lock +++ /dev/null @@ -1,963 +0,0 @@ -{ - "nodes": { - "cachix": { - "inputs": { - "devenv": [ - "crate2nix" - ], - "flake-compat": [ - "crate2nix" - ], - "nixpkgs": "nixpkgs", - "pre-commit-hooks": [ - "crate2nix" - ] - }, - "locked": { - "lastModified": 1709700175, - "narHash": "sha256-A0/6ZjLmT9qdYzKHmevnEIC7G+GiZ4UCr8v0poRPzds=", - "owner": "cachix", - "repo": "cachix", - "rev": "be97b37989f11b724197b5f4c7ffd78f12c8c4bf", - "type": "github" - }, - "original": { - "owner": "cachix", - "ref": "latest", - "repo": "cachix", - "type": "github" - } - }, - "cachix_2": { - "inputs": { - "devenv": [ - "crate2nix", - "crate2nix_stable" - ], - "flake-compat": [ - "crate2nix", - "crate2nix_stable" - ], - "nixpkgs": "nixpkgs_2", - "pre-commit-hooks": [ - "crate2nix", - "crate2nix_stable" - ] - }, - "locked": { - "lastModified": 1716549461, - "narHash": "sha256-lHy5kgx6J8uD+16SO47dPrbob98sh+W1tf4ceSqPVK4=", - "owner": "cachix", - "repo": "cachix", - "rev": "e2bb269fb8c0828d5d4d2d7b8d09ea85abcacbd4", - "type": "github" - }, - "original": { - "owner": "cachix", - "ref": "latest", - "repo": "cachix", - "type": "github" - } - }, - "cachix_3": { - "inputs": { - "devenv": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable" - ], - "flake-compat": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable" - ], - "nixpkgs": "nixpkgs_3", - "pre-commit-hooks": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable" - ] - }, - "locked": { - "lastModified": 1716549461, - "narHash": "sha256-lHy5kgx6J8uD+16SO47dPrbob98sh+W1tf4ceSqPVK4=", - "owner": "cachix", - "repo": "cachix", - "rev": "e2bb269fb8c0828d5d4d2d7b8d09ea85abcacbd4", - "type": "github" - }, - "original": { - "owner": "cachix", - "ref": "latest", - "repo": "cachix", - "type": "github" - } - }, - "crate2nix": { - "inputs": { - "cachix": "cachix", - "crate2nix_stable": "crate2nix_stable", - "devshell": "devshell_3", - "flake-compat": "flake-compat_3", - "flake-parts": "flake-parts_3", - "nix-test-runner": "nix-test-runner_3", - "nixpkgs": [ - "tgi-nix", - "nixpkgs" - ], - "pre-commit-hooks": "pre-commit-hooks_3" - }, - "locked": { - "lastModified": 1723311214, - "narHash": "sha256-xdGZQBEa1AC2us/sY3igS/CucWY6jErXsAvCFRhB2LI=", - "owner": "nix-community", - "repo": "crate2nix", - "rev": "236f6addfd452a48be805819e3216af79e988fd5", - "type": "github" - }, - "original": { - "owner": "nix-community", - "repo": "crate2nix", - "type": "github" - } - }, - "crate2nix_stable": { - "inputs": { - "cachix": "cachix_2", - "crate2nix_stable": "crate2nix_stable_2", - "devshell": "devshell_2", - "flake-compat": "flake-compat_2", - "flake-parts": "flake-parts_2", - "nix-test-runner": "nix-test-runner_2", - "nixpkgs": "nixpkgs_5", - "pre-commit-hooks": "pre-commit-hooks_2" - }, - "locked": { - "lastModified": 1719760004, - "narHash": "sha256-esWhRnt7FhiYq0CcIxw9pvH+ybOQmWBfHYMtleaMhBE=", - "owner": "nix-community", - "repo": "crate2nix", - "rev": "1dee214bb20855fa3e1e7bb98d28922ddaff8c57", - "type": "github" - }, - "original": { - "owner": "nix-community", - "ref": "0.14.1", - "repo": "crate2nix", - "type": "github" - } - }, - "crate2nix_stable_2": { - "inputs": { - "cachix": "cachix_3", - "crate2nix_stable": "crate2nix_stable_3", - "devshell": "devshell", - "flake-compat": "flake-compat", - "flake-parts": "flake-parts", - "nix-test-runner": "nix-test-runner", - "nixpkgs": "nixpkgs_4", - "pre-commit-hooks": "pre-commit-hooks" - }, - "locked": { - "lastModified": 1712821484, - "narHash": "sha256-rGT3CW64cJS9nlnWPFWSc1iEa3dNZecVVuPVGzcsHe8=", - "owner": "nix-community", - "repo": "crate2nix", - "rev": "42883afcad3823fa5811e967fb7bff54bc3c9d6d", - "type": "github" - }, - "original": { - "owner": "nix-community", - "ref": "0.14.0", - "repo": "crate2nix", - "type": "github" - } - }, - "crate2nix_stable_3": { - "inputs": { - "flake-utils": "flake-utils" - }, - "locked": { - "lastModified": 1702842982, - "narHash": "sha256-A9AowkHIjsy1a4LuiPiVP88FMxyCWK41flZEZOUuwQM=", - "owner": "nix-community", - "repo": "crate2nix", - "rev": "75ac2973affa6b9b4f661a7b592cba6e4f51d426", - "type": "github" - }, - "original": { - "owner": "nix-community", - "ref": "0.12.0", - "repo": "crate2nix", - "type": "github" - } - }, - "devshell": { - "inputs": { - "flake-utils": "flake-utils_2", - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1717408969, - "narHash": "sha256-Q0OEFqe35fZbbRPPRdrjTUUChKVhhWXz3T9ZSKmaoVY=", - "owner": "numtide", - "repo": "devshell", - "rev": "1ebbe68d57457c8cae98145410b164b5477761f4", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "devshell", - "type": "github" - } - }, - "devshell_2": { - "inputs": { - "flake-utils": "flake-utils_3", - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1717408969, - "narHash": "sha256-Q0OEFqe35fZbbRPPRdrjTUUChKVhhWXz3T9ZSKmaoVY=", - "owner": "numtide", - "repo": "devshell", - "rev": "1ebbe68d57457c8cae98145410b164b5477761f4", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "devshell", - "type": "github" - } - }, - "devshell_3": { - "inputs": { - "flake-utils": "flake-utils_4", - "nixpkgs": [ - "crate2nix", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1711099426, - "narHash": "sha256-HzpgM/wc3aqpnHJJ2oDqPBkNsqWbW0WfWUO8lKu8nGk=", - "owner": "numtide", - "repo": "devshell", - "rev": "2d45b54ca4a183f2fdcf4b19c895b64fbf620ee8", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "devshell", - "type": "github" - } - }, - "flake-compat": { - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "revCount": 57, - "type": "tarball", - "url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.0.1/018afb31-abd1-7bff-a5e4-cff7e18efb7a/source.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://flakehub.com/f/edolstra/flake-compat/1.tar.gz" - } - }, - "flake-compat_2": { - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "revCount": 57, - "type": "tarball", - "url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.0.1/018afb31-abd1-7bff-a5e4-cff7e18efb7a/source.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://flakehub.com/f/edolstra/flake-compat/1.tar.gz" - } - }, - "flake-compat_3": { - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "revCount": 57, - "type": "tarball", - "url": "https://api.flakehub.com/f/pinned/edolstra/flake-compat/1.0.1/018afb31-abd1-7bff-a5e4-cff7e18efb7a/source.tar.gz" - }, - "original": { - "type": "tarball", - "url": "https://flakehub.com/f/edolstra/flake-compat/1.tar.gz" - } - }, - "flake-compat_4": { - "locked": { - "lastModified": 1696426674, - "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", - "owner": "edolstra", - "repo": "flake-compat", - "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", - "type": "github" - }, - "original": { - "owner": "edolstra", - "repo": "flake-compat", - "type": "github" - } - }, - "flake-parts": { - "inputs": { - "nixpkgs-lib": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1719745305, - "narHash": "sha256-xwgjVUpqSviudEkpQnioeez1Uo2wzrsMaJKJClh+Bls=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "c3c5ecc05edc7dafba779c6c1a61cd08ac6583e9", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "flake-parts", - "type": "github" - } - }, - "flake-parts_2": { - "inputs": { - "nixpkgs-lib": [ - "crate2nix", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1719745305, - "narHash": "sha256-xwgjVUpqSviudEkpQnioeez1Uo2wzrsMaJKJClh+Bls=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "c3c5ecc05edc7dafba779c6c1a61cd08ac6583e9", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "flake-parts", - "type": "github" - } - }, - "flake-parts_3": { - "inputs": { - "nixpkgs-lib": [ - "crate2nix", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1712014858, - "narHash": "sha256-sB4SWl2lX95bExY2gMFG5HIzvva5AVMJd4Igm+GpZNw=", - "owner": "hercules-ci", - "repo": "flake-parts", - "rev": "9126214d0a59633752a136528f5f3b9aa8565b7d", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "flake-parts", - "type": "github" - } - }, - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1694529238, - "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_2": { - "inputs": { - "systems": "systems_2" - }, - "locked": { - "lastModified": 1701680307, - "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_3": { - "inputs": { - "systems": "systems_3" - }, - "locked": { - "lastModified": 1701680307, - "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_4": { - "inputs": { - "systems": "systems_4" - }, - "locked": { - "lastModified": 1701680307, - "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_5": { - "inputs": { - "systems": "systems_5" - }, - "locked": { - "lastModified": 1710146030, - "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "flake-utils_6": { - "inputs": { - "systems": "systems_6" - }, - "locked": { - "lastModified": 1710146030, - "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, - "gitignore": { - "inputs": { - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "pre-commit-hooks", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1709087332, - "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, - "gitignore_2": { - "inputs": { - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "pre-commit-hooks", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1709087332, - "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, - "gitignore_3": { - "inputs": { - "nixpkgs": [ - "crate2nix", - "pre-commit-hooks", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1709087332, - "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", - "owner": "hercules-ci", - "repo": "gitignore.nix", - "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", - "type": "github" - }, - "original": { - "owner": "hercules-ci", - "repo": "gitignore.nix", - "type": "github" - } - }, - "nix-filter": { - "locked": { - "lastModified": 1710156097, - "narHash": "sha256-1Wvk8UP7PXdf8bCCaEoMnOT1qe5/Duqgj+rL8sRQsSM=", - "owner": "numtide", - "repo": "nix-filter", - "rev": "3342559a24e85fc164b295c3444e8a139924675b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "nix-filter", - "type": "github" - } - }, - "nix-test-runner": { - "flake": false, - "locked": { - "lastModified": 1588761593, - "narHash": "sha256-FKJykltAN/g3eIceJl4SfDnnyuH2jHImhMrXS2KvGIs=", - "owner": "stoeffel", - "repo": "nix-test-runner", - "rev": "c45d45b11ecef3eb9d834c3b6304c05c49b06ca2", - "type": "github" - }, - "original": { - "owner": "stoeffel", - "repo": "nix-test-runner", - "type": "github" - } - }, - "nix-test-runner_2": { - "flake": false, - "locked": { - "lastModified": 1588761593, - "narHash": "sha256-FKJykltAN/g3eIceJl4SfDnnyuH2jHImhMrXS2KvGIs=", - "owner": "stoeffel", - "repo": "nix-test-runner", - "rev": "c45d45b11ecef3eb9d834c3b6304c05c49b06ca2", - "type": "github" - }, - "original": { - "owner": "stoeffel", - "repo": "nix-test-runner", - "type": "github" - } - }, - "nix-test-runner_3": { - "flake": false, - "locked": { - "lastModified": 1588761593, - "narHash": "sha256-FKJykltAN/g3eIceJl4SfDnnyuH2jHImhMrXS2KvGIs=", - "owner": "stoeffel", - "repo": "nix-test-runner", - "rev": "c45d45b11ecef3eb9d834c3b6304c05c49b06ca2", - "type": "github" - }, - "original": { - "owner": "stoeffel", - "repo": "nix-test-runner", - "type": "github" - } - }, - "nixpkgs": { - "locked": { - "lastModified": 1700612854, - "narHash": "sha256-yrQ8osMD+vDLGFX7pcwsY/Qr5PUd6OmDMYJZzZi0+zc=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "19cbff58383a4ae384dea4d1d0c823d72b49d614", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_2": { - "locked": { - "lastModified": 1715534503, - "narHash": "sha256-5ZSVkFadZbFP1THataCaSf0JH2cAH3S29hU9rrxTEqk=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "2057814051972fa1453ddfb0d98badbea9b83c06", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_3": { - "locked": { - "lastModified": 1715534503, - "narHash": "sha256-5ZSVkFadZbFP1THataCaSf0JH2cAH3S29hU9rrxTEqk=", - "owner": "NixOS", - "repo": "nixpkgs", - "rev": "2057814051972fa1453ddfb0d98badbea9b83c06", - "type": "github" - }, - "original": { - "owner": "NixOS", - "ref": "nixos-unstable", - "repo": "nixpkgs", - "type": "github" - } - }, - "nixpkgs_4": { - "locked": { - "lastModified": 1719506693, - "narHash": "sha256-C8e9S7RzshSdHB7L+v9I51af1gDM5unhJ2xO1ywxNH8=", - "path": "/nix/store/4p0avw1s3vf27hspgqsrqs37gxk4i83i-source", - "rev": "b2852eb9365c6de48ffb0dc2c9562591f652242a", - "type": "path" - }, - "original": { - "id": "nixpkgs", - "type": "indirect" - } - }, - "nixpkgs_5": { - "locked": { - "lastModified": 1719506693, - "narHash": "sha256-C8e9S7RzshSdHB7L+v9I51af1gDM5unhJ2xO1ywxNH8=", - "path": "/nix/store/4p0avw1s3vf27hspgqsrqs37gxk4i83i-source", - "rev": "b2852eb9365c6de48ffb0dc2c9562591f652242a", - "type": "path" - }, - "original": { - "id": "nixpkgs", - "type": "indirect" - } - }, - "nixpkgs_6": { - "locked": { - "lastModified": 1723912943, - "narHash": "sha256-39F9GzyhxYcY3wTeKuEFWRJWcrGBosO4nf4xzMTWZX8=", - "owner": "danieldk", - "repo": "nixpkgs", - "rev": "b82cdca86dbb30013b76c4b55d48806476820a5c", - "type": "github" - }, - "original": { - "owner": "danieldk", - "ref": "cuda-12.4", - "repo": "nixpkgs", - "type": "github" - } - }, - "pre-commit-hooks": { - "inputs": { - "flake-compat": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "flake-compat" - ], - "gitignore": "gitignore", - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "nixpkgs" - ], - "nixpkgs-stable": [ - "crate2nix", - "crate2nix_stable", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1719259945, - "narHash": "sha256-F1h+XIsGKT9TkGO3omxDLEb/9jOOsI6NnzsXFsZhry4=", - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "rev": "0ff4381bbb8f7a52ca4a851660fc7a437a4c6e07", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "type": "github" - } - }, - "pre-commit-hooks_2": { - "inputs": { - "flake-compat": [ - "crate2nix", - "crate2nix_stable", - "flake-compat" - ], - "gitignore": "gitignore_2", - "nixpkgs": [ - "crate2nix", - "crate2nix_stable", - "nixpkgs" - ], - "nixpkgs-stable": [ - "crate2nix", - "crate2nix_stable", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1719259945, - "narHash": "sha256-F1h+XIsGKT9TkGO3omxDLEb/9jOOsI6NnzsXFsZhry4=", - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "rev": "0ff4381bbb8f7a52ca4a851660fc7a437a4c6e07", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "type": "github" - } - }, - "pre-commit-hooks_3": { - "inputs": { - "flake-compat": [ - "crate2nix", - "flake-compat" - ], - "flake-utils": "flake-utils_5", - "gitignore": "gitignore_3", - "nixpkgs": [ - "crate2nix", - "nixpkgs" - ], - "nixpkgs-stable": [ - "crate2nix", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1712055707, - "narHash": "sha256-4XLvuSIDZJGS17xEwSrNuJLL7UjDYKGJSbK1WWX2AK8=", - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "rev": "e35aed5fda3cc79f88ed7f1795021e559582093a", - "type": "github" - }, - "original": { - "owner": "cachix", - "repo": "pre-commit-hooks.nix", - "type": "github" - } - }, - "root": { - "inputs": { - "crate2nix": "crate2nix", - "flake-utils": "flake-utils_6", - "nix-filter": "nix-filter", - "nixpkgs": [ - "tgi-nix", - "nixpkgs" - ], - "rust-overlay": "rust-overlay", - "tgi-nix": "tgi-nix" - } - }, - "rust-overlay": { - "inputs": { - "nixpkgs": [ - "tgi-nix", - "nixpkgs" - ] - }, - "locked": { - "lastModified": 1724206841, - "narHash": "sha256-L8dKaX4T3k+TR2fEHCfGbH4UXdspovz/pj87iai9qmc=", - "owner": "oxalica", - "repo": "rust-overlay", - "rev": "45e98fbd62c32e5927e952d2833fa1ba4fb35a61", - "type": "github" - }, - "original": { - "owner": "oxalica", - "repo": "rust-overlay", - "type": "github" - } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_2": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_3": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_4": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_5": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "systems_6": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } - }, - "tgi-nix": { - "inputs": { - "flake-compat": "flake-compat_4", - "nixpkgs": "nixpkgs_6" - }, - "locked": { - "lastModified": 1724218652, - "narHash": "sha256-Y7Kt+AZRIdo7tr/VhKGzdwYf7stiYQ4JD7flusEpXQw=", - "owner": "danieldk", - "repo": "tgi-nix", - "rev": "ab2761aa7b970e737492b8cc41ca580dcb094808", - "type": "github" - }, - "original": { - "owner": "danieldk", - "repo": "tgi-nix", - "type": "github" - } - } - }, - "root": "root", - "version": 7 -} diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 9a90a673..1f47475b 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -31,6 +31,10 @@ struct RawConfig { model_type: Option, max_seq_len: Option, quantization_config: Option, + n_embd: Option, + hidden_size: Option, + num_attention_heads: Option, + head_dim: Option, } #[derive(Deserialize)] @@ -42,6 +46,7 @@ struct QuantizationConfig { struct Config { max_position_embeddings: Option, quantize: Option, + head_dim: Option, } impl From for Config { @@ -51,9 +56,26 @@ impl From for Config { .or(other.max_seq_len) .or(other.n_positions); let quantize = other.quantization_config.and_then(|q| q.quant_method); + let head_dim = other.head_dim.or_else(|| { + match (other.hidden_size, other.n_embd, other.num_attention_heads) { + (Some(hidden_size), _, Some(num_attention_heads)) + if hidden_size % num_attention_heads == 0 => + { + Some(hidden_size / num_attention_heads) + } + // Legacy + (_, Some(hidden_size), Some(num_attention_heads)) + if hidden_size % num_attention_heads == 0 => + { + Some(hidden_size / num_attention_heads) + } + _ => None, + } + }); Config { max_position_embeddings, quantize, + head_dim, } } } @@ -1475,6 +1497,16 @@ fn main() -> Result<(), LauncherError> { std::env::set_var("ATTENTION", "flashdecoding"); } let config: Config = config.into(); + match config.head_dim { + Some(h) if h == 64 || h == 128 || h == 256 => { + // std::env::set_var("ATTENTION", "flashdecoding"); + } + _ => { + tracing::info!("Forcing flash decoding because head dim is not supported by flashinfer, also disabling prefix caching"); + std::env::set_var("USE_PREFIX_CACHING", "0"); + std::env::set_var("ATTENTION", "flashdecoding"); + } + } let quantize = config.quantize; // Quantization usually means you're even more RAM constrained.