diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py index f6ffc89d0..0c702c636 100644 --- a/clients/python/tests/test_client.py +++ b/clients/python/tests/test_client.py @@ -2,7 +2,7 @@ import pytest from text_generation import Client, AsyncClient from text_generation.errors import NotFoundError, ValidationError -from text_generation.types import FinishReason, InputToken +from text_generation.types import FinishReason def test_generate(llama_7b_url, hf_headers): diff --git a/flake.lock b/flake.lock index 5d6ee4635..512625de4 100644 --- a/flake.lock +++ b/flake.lock @@ -978,16 +978,16 @@ "nixpkgs": "nixpkgs_6" }, "locked": { - "lastModified": 1739803255, - "narHash": "sha256-lreIfcjSt6D0wOuZ6jm3WEBYvYvED63T+pOKmOgBLi8=", + "lastModified": 1740036032, + "narHash": "sha256-nqo3U8uNlFIgrOz8wCfgk08Oi+RzQxxFDPipeVMyM/E=", "owner": "huggingface", "repo": "text-generation-inference-nix", - "rev": "30ab7423277fc93c8fc0ca4df737478ebfdb8eec", + "rev": "e9fb0e818a7e9a54cdab8d9c7c0cef5037fe084a", "type": "github" }, "original": { "owner": "huggingface", - "ref": "eetq-0.0.1", + "ref": "flashinfer-0.2.0.post2", "repo": "text-generation-inference-nix", "type": "github" } diff --git a/flake.nix b/flake.nix index 5ba501141..6068dc5f7 100644 --- a/flake.nix +++ b/flake.nix @@ -5,7 +5,7 @@ inputs.nixpkgs.follows = "tgi-nix/nixpkgs"; }; nix-filter.url = "github:numtide/nix-filter"; - tgi-nix.url = "github:huggingface/text-generation-inference-nix/eetq-0.0.1"; + tgi-nix.url = "github:huggingface/text-generation-inference-nix/flashinfer-0.2.0.post2"; nixpkgs.follows = "tgi-nix/nixpkgs"; flake-utils.url = "github:numtide/flake-utils"; rust-overlay = { diff --git a/server/Makefile-flashinfer b/server/Makefile-flashinfer index d095d841d..f311a6569 100644 --- a/server/Makefile-flashinfer +++ b/server/Makefile-flashinfer @@ -3,4 +3,4 @@ install-flashinfer: # `pip install flashinfer` cannot resolve it. uv pip install fsspec sympy==1.13.1 numpy uv pip install -U setuptools - TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0+PTX" FLASHINFER_ENABLE_AOT=1 pip install git+https://github.com/flashinfer-ai/flashinfer.git@v0.2.0.post1#egg=flashinfer --no-build-isolation + TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;8.9;9.0+PTX" FLASHINFER_ENABLE_AOT=1 pip install git+https://github.com/flashinfer-ai/flashinfer.git@v0.2.0.post2#egg=flashinfer-python --no-build-isolation