mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Revert "Modifying this should cache hit."
This reverts commit 46a2bde108
.
This commit is contained in:
parent
46a2bde108
commit
c44511220d
@ -3,6 +3,7 @@ pub mod config;
|
|||||||
pub mod infer;
|
pub mod infer;
|
||||||
pub mod server;
|
pub mod server;
|
||||||
pub mod validation;
|
pub mod validation;
|
||||||
|
// Dummy change to trigger CI, TODO remove this.
|
||||||
|
|
||||||
#[cfg(feature = "kserve")]
|
#[cfg(feature = "kserve")]
|
||||||
mod kserve;
|
mod kserve;
|
||||||
|
@ -1 +0,0 @@
|
|||||||
3.12
|
|
@ -1,6 +0,0 @@
|
|||||||
def main():
|
|
||||||
print("Hello from server!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
@ -1,98 +0,0 @@
|
|||||||
[tool.poetry]
|
|
||||||
name = "text-generation-server"
|
|
||||||
version = "2.0.5-dev0"
|
|
||||||
description = "Text Generation Inference Python gRPC Server"
|
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
|
||||||
|
|
||||||
[tool.poetry.scripts]
|
|
||||||
text-generation-server = 'text_generation_server.cli:app'
|
|
||||||
|
|
||||||
[dependencies]
|
|
||||||
python = ">=3.9,<3.13"
|
|
||||||
protobuf = ">=4.25.3,<6"
|
|
||||||
grpcio = "^1.51.1"
|
|
||||||
grpcio-status = "^1.51.1"
|
|
||||||
grpcio-reflection = "^1.51.1"
|
|
||||||
grpc-interceptor = "^0.15.4"
|
|
||||||
typer = "^0.12.5"
|
|
||||||
accelerate = {version = "^1.1.0", optional = true}
|
|
||||||
bitsandbytes = { version = "^0.43.0", optional = true }
|
|
||||||
safetensors = "^0.4.5"
|
|
||||||
loguru = "^0.7.2"
|
|
||||||
opentelemetry-api = "^1.27.0"
|
|
||||||
opentelemetry-exporter-otlp = "^1.27.0"
|
|
||||||
opentelemetry-instrumentation-grpc = "^0.48b0"
|
|
||||||
hf-transfer = "^0.1.2"
|
|
||||||
sentencepiece = "^0.2.0"
|
|
||||||
tokenizers = "^0.20.3"
|
|
||||||
huggingface-hub = "^0.23"
|
|
||||||
transformers = "^4.46.2"
|
|
||||||
einops = "^0.8.0"
|
|
||||||
texttable = { version = "^1.6.7", optional = true }
|
|
||||||
datasets = {version = "^2.21.0", optional = true}
|
|
||||||
peft = {version = "^0.13.2", optional = true}
|
|
||||||
torch = {version = "^2.4.1", optional = true}
|
|
||||||
scipy = "^1.13.1"
|
|
||||||
pillow = "^11.0.0"
|
|
||||||
outlines= {version = "^0.1.3", optional = true}
|
|
||||||
prometheus-client = ">=0.20.0,<0.22"
|
|
||||||
py-cpuinfo = "^9.0.0"
|
|
||||||
compressed-tensors = {version = "^0.7.1", optional = true}
|
|
||||||
# Remove later, temporary workaround for outlines.
|
|
||||||
numpy = "^1.26.4"
|
|
||||||
|
|
||||||
attention-kernels = [
|
|
||||||
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
||||||
]
|
|
||||||
marlin-kernels = [
|
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
||||||
]
|
|
||||||
moe-kernels = [
|
|
||||||
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
||||||
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
||||||
]
|
|
||||||
rich = "^13.8.1"
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
torch = ["torch"]
|
|
||||||
accelerate = ["accelerate"]
|
|
||||||
attention = ["attention-kernels"]
|
|
||||||
bnb = ["bitsandbytes"]
|
|
||||||
compressed-tensors = ["compressed-tensors"]
|
|
||||||
marlin = ["marlin-kernels"]
|
|
||||||
moe = ["moe-kernels"]
|
|
||||||
peft = ["peft"]
|
|
||||||
quantize = ["texttable", "datasets", "accelerate"]
|
|
||||||
outlines = ["outlines"]
|
|
||||||
|
|
||||||
[dependency-groups]
|
|
||||||
dev = [
|
|
||||||
"grpcio-tools>=1.51.1,<2.0",
|
|
||||||
"pytest>=7.3.0,<8"
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
[[tool.poetry.source]]
|
|
||||||
name = "pytorch-gpu-src"
|
|
||||||
url = "https://download.pytorch.org/whl/cu121"
|
|
||||||
priority = "explicit"
|
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = [
|
|
||||||
"poetry-core>=1.0.0",
|
|
||||||
]
|
|
||||||
build-backend = "poetry.core.masonry.api"
|
|
||||||
|
|
||||||
[tool.isort]
|
|
||||||
profile = "black"
|
|
@ -9,8 +9,6 @@ from enum import Enum
|
|||||||
from huggingface_hub import hf_hub_download
|
from huggingface_hub import hf_hub_download
|
||||||
from text_generation_server.utils.adapter import parse_lora_adapters
|
from text_generation_server.utils.adapter import parse_lora_adapters
|
||||||
|
|
||||||
# Dummy change should cache hit.
|
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user