mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-08 19:04:52 +00:00
chore: prepare version 3.3.5 (#3314)
* chore: prepare version 3.3.5 * black * neuron: black * Update hf-xet in uv lockfile * Attempt to fix API doc check failure Add `error_type` where missing. * Pin redocly version * Sync redocly with Nix for now --------- Co-authored-by: Daniël de Kok <me@danieldk.eu>
This commit is contained in:
parent
06d9d88b95
commit
0f79162288
2
.github/workflows/autodocs.yaml
vendored
2
.github/workflows/autodocs.yaml
vendored
@ -41,5 +41,5 @@ jobs:
|
||||
|
||||
- name: Check that documentation is up-to-date
|
||||
run: |
|
||||
npm install -g @redocly/cli
|
||||
npm install -g @redocly/cli@1.34.2
|
||||
python update_doc.py --check
|
||||
|
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -4650,7 +4650,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-backends-trtllm"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"clap 4.5.32",
|
||||
@ -4671,7 +4671,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-benchmark"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"average",
|
||||
"clap 4.5.32",
|
||||
@ -4691,7 +4691,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-client"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
@ -4709,7 +4709,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-launcher"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"clap 4.5.32",
|
||||
"ctrlc",
|
||||
@ -4730,7 +4730,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
@ -4782,7 +4782,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router-llamacpp"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bindgen 0.71.1",
|
||||
@ -4800,7 +4800,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router-v2"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@ -4849,7 +4849,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router-v3"
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
|
@ -21,7 +21,7 @@ default-members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "3.3.4-dev0"
|
||||
version = "3.3.5-dev0"
|
||||
edition = "2021"
|
||||
authors = ["Olivier Dehaene"]
|
||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||
|
@ -84,7 +84,7 @@ model=HuggingFaceH4/zephyr-7b-beta
|
||||
volume=$PWD/data
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4 --model-id $model
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5 --model-id $model
|
||||
```
|
||||
|
||||
And then you can make requests like
|
||||
@ -121,7 +121,7 @@ curl localhost:8080/v1/chat/completions \
|
||||
|
||||
**Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
|
||||
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/installation_amd#using-tgi-with-amd-gpus). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.4-rocm --model-id $model` instead of the command above.
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/installation_amd#using-tgi-with-amd-gpus). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.5-rocm --model-id $model` instead of the command above.
|
||||
|
||||
To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
|
||||
```
|
||||
@ -152,7 +152,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
token=<your cli READ token>
|
||||
|
||||
docker run --gpus all --shm-size 1g -e HF_TOKEN=$token -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4 --model-id $model
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5 --model-id $model
|
||||
```
|
||||
|
||||
### A note on Shared Memory (shm)
|
||||
|
@ -19,7 +19,7 @@ docker run -p 8080:80 \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
-e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--max-input-tokens 1024 --max-total-tokens 2048 \
|
||||
--max-batch-prefill-tokens 2048 --max-batch-size 32 \
|
||||
@ -39,7 +39,7 @@ docker run -p 8080:80 \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
-e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--sharded true --num-shard 8 \
|
||||
--max-input-tokens 1024 --max-total-tokens 2048 \
|
||||
@ -58,7 +58,7 @@ docker run -p 8080:80 \
|
||||
--cap-add=sys_nice \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--max-input-tokens 4096 --max-batch-prefill-tokens 16384 \
|
||||
--max-total-tokens 8192 --max-batch-size 4
|
||||
@ -81,7 +81,7 @@ docker run -p 8080:80 \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
-e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--kv-cache-dtype fp8_e4m3fn \
|
||||
--max-input-tokens 1024 --max-total-tokens 2048 \
|
||||
@ -102,7 +102,7 @@ docker run -p 8080:80 \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
-e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--kv-cache-dtype fp8_e4m3fn \
|
||||
--sharded true --num-shard 8 \
|
||||
|
@ -56,6 +56,7 @@ def _test_prefill(config_name, generator, batch_size, do_sample):
|
||||
assert tokens.ids[0] == expectations[0]
|
||||
assert tokens.texts[0] == expectations[1]
|
||||
|
||||
|
||||
def test_prefill_truncate(neuron_model_config):
|
||||
config_name = neuron_model_config["name"]
|
||||
neuron_model_path = neuron_model_config["neuron_model_path"]
|
||||
|
@ -10,7 +10,7 @@
|
||||
"name": "Apache 2.0",
|
||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
},
|
||||
"version": "3.3.4-dev0"
|
||||
"version": "3.3.5-dev0"
|
||||
},
|
||||
"paths": {
|
||||
"/": {
|
||||
@ -57,7 +57,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Input validation error"
|
||||
"error": "Input validation error",
|
||||
"error_type": "validation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -70,7 +71,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Request failed during generation"
|
||||
"error": "Request failed during generation",
|
||||
"error_type": "generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -83,7 +85,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Model is overloaded"
|
||||
"error": "Model is overloaded",
|
||||
"error_type": "overloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -96,7 +99,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Incomplete generation"
|
||||
"error": "Incomplete generation",
|
||||
"error_type": "incomplete_generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -181,7 +185,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Input validation error"
|
||||
"error": "Input validation error",
|
||||
"error_type": "validation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -194,7 +199,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Request failed during generation"
|
||||
"error": "Request failed during generation",
|
||||
"error_type": "generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,7 +213,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Model is overloaded"
|
||||
"error": "Model is overloaded",
|
||||
"error_type": "overloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -220,7 +227,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Incomplete generation"
|
||||
"error": "Incomplete generation",
|
||||
"error_type": "incomplete_generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -264,7 +272,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Input validation error"
|
||||
"error": "Input validation error",
|
||||
"error_type": "validation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -277,7 +286,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Request failed during generation"
|
||||
"error": "Request failed during generation",
|
||||
"error_type": "generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -290,7 +300,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Model is overloaded"
|
||||
"error": "Model is overloaded",
|
||||
"error_type": "overloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -303,7 +314,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Incomplete generation"
|
||||
"error": "Incomplete generation",
|
||||
"error_type": "incomplete_generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -558,7 +570,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Input validation error"
|
||||
"error": "Input validation error",
|
||||
"error_type": "validation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -571,7 +584,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Request failed during generation"
|
||||
"error": "Request failed during generation",
|
||||
"error_type": "generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -584,7 +598,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Model is overloaded"
|
||||
"error": "Model is overloaded",
|
||||
"error_type": "overloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -597,7 +612,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Incomplete generation"
|
||||
"error": "Incomplete generation",
|
||||
"error_type": "incomplete_generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -646,7 +662,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Input validation error"
|
||||
"error": "Input validation error",
|
||||
"error_type": "validation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -659,7 +676,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Request failed during generation"
|
||||
"error": "Request failed during generation",
|
||||
"error_type": "generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -672,7 +690,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Model is overloaded"
|
||||
"error": "Model is overloaded",
|
||||
"error_type": "overloaded"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -685,7 +704,8 @@
|
||||
"$ref": "#/components/schemas/ErrorResponse"
|
||||
},
|
||||
"example": {
|
||||
"error": "Incomplete generation"
|
||||
"error": "Incomplete generation",
|
||||
"error_type": "incomplete_generation"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ hf_token=YOUR_HF_ACCESS_TOKEN
|
||||
|
||||
docker run --runtime=habana --cap-add=sys_nice --ipc=host \
|
||||
-p 8080:80 -v $volume:/data -e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
@ -52,7 +52,7 @@ hf_token=YOUR_ACCESS_TOKEN
|
||||
|
||||
docker run --runtime=habana --cap-add=sys_nice --ipc=host \
|
||||
-p 8080:80 -v $volume:/data -e HF_TOKEN=$hf_token \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model
|
||||
<text-generation-inference-launcher-arguments>
|
||||
```
|
||||
@ -106,7 +106,7 @@ docker run -p 8080:80 \
|
||||
--cap-add=sys_nice \
|
||||
--ipc=host \
|
||||
-v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-gaudi \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-gaudi \
|
||||
--model-id $model \
|
||||
--max-input-tokens 4096 --max-batch-prefill-tokens 16384 \
|
||||
--max-total-tokens 8192 --max-batch-size 4
|
||||
|
@ -31,7 +31,7 @@ deployment instructions in the model card:
|
||||
The service is launched simply by running the text-generation-inference container with two sets of parameters:
|
||||
|
||||
```
|
||||
docker run <system_parameters> ghcr.io/huggingface/text-generation-inference:3.3.4-neuron <service_parameters>
|
||||
docker run <system_parameters> ghcr.io/huggingface/text-generation-inference:3.3.5-neuron <service_parameters>
|
||||
```
|
||||
|
||||
- system parameters are used to map ports, volumes and devices between the host and the service,
|
||||
|
@ -19,6 +19,6 @@ docker run --gpus all \
|
||||
--shm-size 1g \
|
||||
-e HF_TOKEN=$token \
|
||||
-p 8080:80 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.4 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.5 \
|
||||
--model-id $model
|
||||
```
|
||||
|
@ -19,7 +19,7 @@ bitsandbytes is a library used to apply 8-bit and 4-bit quantization to models.
|
||||
In TGI, you can use 8-bit quantization by adding `--quantize bitsandbytes` like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.4 --model-id $model --quantize bitsandbytes
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.5 --model-id $model --quantize bitsandbytes
|
||||
```
|
||||
|
||||
4-bit quantization is also possible with bitsandbytes. You can choose one of the following 4-bit data types: 4-bit float (`fp4`), or 4-bit `NormalFloat` (`nf4`). These data types were introduced in the context of parameter-efficient fine-tuning, but you can apply them for inference by automatically converting the model weights on load.
|
||||
@ -27,7 +27,7 @@ docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingf
|
||||
In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.4 --model-id $model --quantize bitsandbytes-nf4
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.5 --model-id $model --quantize bitsandbytes-nf4
|
||||
```
|
||||
|
||||
You can get more information about 8-bit quantization by reading this [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), and 4-bit quantization by reading [this blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes).
|
||||
@ -48,7 +48,7 @@ $$({\hat{W}_{l}}^{*} = argmin_{\hat{W_{l}}} ||W_{l}X-\hat{W}_{l}X||^{2}_{2})$$
|
||||
TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.4 --model-id $model --quantize gptq
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.3.5 --model-id $model --quantize gptq
|
||||
```
|
||||
|
||||
Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
|
||||
|
@ -11,7 +11,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
|
||||
--device=/dev/kfd --device=/dev/dri --group-add video \
|
||||
--ipc=host --shm-size 256g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-rocm \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-rocm \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
|
@ -12,7 +12,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm --privileged --cap-add=sys_nice \
|
||||
--device=/dev/dri \
|
||||
--ipc=host --shm-size 1g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-intel-xpu \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-intel-xpu \
|
||||
--model-id $model --cuda-graphs 0
|
||||
```
|
||||
|
||||
@ -29,7 +29,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm --privileged --cap-add=sys_nice \
|
||||
--device=/dev/dri \
|
||||
--ipc=host --shm-size 1g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4-intel-cpu \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5-intel-cpu \
|
||||
--model-id $model --cuda-graphs 0
|
||||
```
|
||||
|
||||
|
@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 64g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4 \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5 \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
|
@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.4 \
|
||||
ghcr.io/huggingface/text-generation-inference:3.3.5 \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
@ -96,7 +96,7 @@ curl 127.0.0.1:8080/generate \
|
||||
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
|
||||
|
||||
```bash
|
||||
docker run ghcr.io/huggingface/text-generation-inference:3.3.4 --help
|
||||
docker run ghcr.io/huggingface/text-generation-inference:3.3.5 --help
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
@ -163,7 +163,7 @@ hub = {
|
||||
|
||||
# create Hugging Face Model Class
|
||||
huggingface_model = HuggingFaceModel(
|
||||
image_uri=get_huggingface_llm_image_uri("huggingface",version="3.3.4"),
|
||||
image_uri=get_huggingface_llm_image_uri("huggingface",version="3.3.5"),
|
||||
env=hub,
|
||||
role=role,
|
||||
)
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 42,
|
||||
"prompt_tokens": 277,
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 62,
|
||||
"prompt_tokens": 277,
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 67,
|
||||
"prompt_tokens": 277,
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 72,
|
||||
"prompt_tokens": 275,
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 80,
|
||||
"prompt_tokens": 279,
|
||||
|
@ -14,7 +14,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 35,
|
||||
"prompt_tokens": 32,
|
||||
|
@ -14,7 +14,7 @@
|
||||
"id": "",
|
||||
"model": "google/gemma-3-4b-it",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 44,
|
||||
"prompt_tokens": 37,
|
||||
|
@ -18,7 +18,7 @@
|
||||
"id": "",
|
||||
"model": "unsloth/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 45,
|
||||
@ -44,7 +44,7 @@
|
||||
"id": "",
|
||||
"model": "unsloth/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 45,
|
||||
|
@ -17,7 +17,7 @@
|
||||
"id": "",
|
||||
"model": "unsloth/Llama-3.2-11B-Vision-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.3.4-dev0-native",
|
||||
"system_fingerprint": "3.3.5-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 10,
|
||||
"prompt_tokens": 45,
|
||||
|
@ -43,7 +43,10 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
|
||||
],
|
||||
"seed": 42,
|
||||
"max_tokens": 500,
|
||||
"response_format": {"type": "json_object", "value": Weather.model_json_schema()},
|
||||
"response_format": {
|
||||
"type": "json_object",
|
||||
"value": Weather.model_json_schema(),
|
||||
},
|
||||
}
|
||||
# send the request
|
||||
response = requests.post(
|
||||
@ -75,7 +78,11 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
|
||||
|
||||
json_payload["response_format"] = {
|
||||
"type": "json_schema",
|
||||
"value": {"name": "weather", "strict": True, "schema": Weather.model_json_schema()},
|
||||
"value": {
|
||||
"name": "weather",
|
||||
"strict": True,
|
||||
"schema": Weather.model_json_schema(),
|
||||
},
|
||||
}
|
||||
response = requests.post(
|
||||
f"{llama_grammar.base_url}/v1/chat/completions",
|
||||
@ -119,7 +126,10 @@ async def test_grammar_response_format_llama_error_if_tools_not_installed(
|
||||
"seed": 42,
|
||||
"max_tokens": 500,
|
||||
"tools": [],
|
||||
"response_format": {"type": "json_object", "value": Weather.model_json_schema()},
|
||||
"response_format": {
|
||||
"type": "json_object",
|
||||
"value": Weather.model_json_schema(),
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
|
@ -113,13 +113,13 @@ content(
|
||||
("text/event-stream" = StreamResponse),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"})),
|
||||
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"})),
|
||||
example = json ! ({"error": "Input validation error", "error_type": "validation"})),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"})),
|
||||
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"})),
|
||||
)
|
||||
)]
|
||||
#[instrument(skip(infer, req))]
|
||||
@ -249,13 +249,13 @@ request_body = GenerateRequest,
|
||||
responses(
|
||||
(status = 200, description = "Generated Text", body = GenerateResponse),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"})),
|
||||
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"})),
|
||||
example = json ! ({"error": "Input validation error", "error_type": "validation"})),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"})),
|
||||
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"})),
|
||||
)
|
||||
)]
|
||||
#[instrument(
|
||||
@ -448,16 +448,16 @@ responses(
|
||||
(status = 200, description = "Generated Text", body = StreamResponse,
|
||||
content_type = "text/event-stream"),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"}),
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"}),
|
||||
content_type = "text/event-stream"),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"}),
|
||||
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"}),
|
||||
content_type = "text/event-stream"),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"}),
|
||||
example = json ! ({"error": "Input validation error", "error_type": "validation"}),
|
||||
content_type = "text/event-stream"),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"}),
|
||||
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"}),
|
||||
content_type = "text/event-stream"),
|
||||
)
|
||||
)]
|
||||
@ -691,13 +691,13 @@ content(
|
||||
("text/event-stream" = Chunk),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"})),
|
||||
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"})),
|
||||
example = json ! ({"error": "Input validation error", "error_type": "validation"})),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"})),
|
||||
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"})),
|
||||
)
|
||||
)]
|
||||
#[instrument(
|
||||
@ -1144,13 +1144,13 @@ content(
|
||||
("text/event-stream" = ChatCompletionChunk),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"})),
|
||||
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"})),
|
||||
example = json ! ({"error": "Input validation error", "error_type": "validation"})),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"})),
|
||||
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"})),
|
||||
)
|
||||
)]
|
||||
#[instrument(
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch Idefics2 model."""
|
||||
"""PyTorch Idefics2 model."""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch Idefics3 model."""
|
||||
"""PyTorch Idefics3 model."""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
@ -17,7 +17,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" Idefics model configuration"""
|
||||
"""Idefics model configuration"""
|
||||
import copy
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
|
@ -17,7 +17,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch Idefics model."""
|
||||
"""PyTorch Idefics model."""
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object"""
|
||||
"""PyTorch IdeficsVision model: a copy of CLIPVisionModel using a simpler config object"""
|
||||
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch Llava-NeXT model."""
|
||||
"""PyTorch Llava-NeXT model."""
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch GPTNeoX model."""
|
||||
"""PyTorch GPTNeoX model."""
|
||||
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
""" PyTorch T5 model."""
|
||||
"""PyTorch T5 model."""
|
||||
|
||||
import copy
|
||||
import math
|
||||
|
@ -9,7 +9,7 @@ import numpy as np
|
||||
|
||||
|
||||
def find_segments(
|
||||
adapter_indices: Union[torch.Tensor, List[int]]
|
||||
adapter_indices: Union[torch.Tensor, List[int]],
|
||||
) -> Tuple[List[int], List[int]]:
|
||||
if isinstance(adapter_indices, torch.Tensor):
|
||||
adapter_indices = adapter_indices.cpu().numpy()
|
||||
|
@ -720,17 +720,17 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "hf-xet"
|
||||
version = "1.0.0"
|
||||
version = "1.1.9"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/64/46/db229dddc55121478105940b610fef1b466c414da02be9d4daa5602a2527/hf_xet-1.0.0.tar.gz", hash = "sha256:5e0ca891ce599fd753e7ffbdc182207d952a93e6252eeb92118475d6866bb093", size = 257192 }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/23/0f/5b60fc28ee7f8cc17a5114a584fd6b86e11c3e0a6e142a7f97a161e9640a/hf_xet-1.1.9.tar.gz", hash = "sha256:c99073ce404462e909f1d5839b2d14a3827b8fe75ed8aed551ba6609c026c803", size = 484242 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e7/0a/c16f8766fa3cd520292b1a765e9b50b8390bce4c2ed7657db9534551f5ed/hf_xet-1.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:6106304f92bbce7c9b8509f6f735f2e8ce95e4dc32af8876e874c48b15ca1903", size = 5001841 },
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/9f/cca55edd85d03fc98c743bcc093965740a7440e909779c558039d6838f03/hf_xet-1.0.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:4d0bc7a3e6c1d21fcbb48e8726e3b19a2460e95971375e55e9a5f73ec7079a86", size = 4805318 },
|
||||
{ url = "https://files.pythonhosted.org/packages/d1/0b/28bda7ac9d699dcfb96f628aa135ddca3f0f77e9716351aab2b83966f957/hf_xet-1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23dee64f114ea9a272ff71a6a755e025d7a075a6a0dbf6da0990fe9211831ccf", size = 53504907 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cb/04/ef1f7249a813841d193cbab2ef4d1d7d67c66c61d21d45223a72fdc5c88e/hf_xet-1.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d5f160550508ed87783d1eca4288602a713d1c45ec517d937acb9d93120f0cab", size = 52410434 },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/b3/e7abec2619ecd9d1c743adfe79fa69cf84530f530969daf3dc804efef65b/hf_xet-1.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:5ebd79db87df0b9d3607e7c9a6bb0662c10e36992f522f66b1d2a7fe93f53f27", size = 53465113 },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/82/b51f3b6e5c6f33e91220c37b17760229704c58e79ab0fcfd0fd3b55803d3/hf_xet-1.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8e6d2625971b4affad634835db82d5392f38de874205a9573e0dd3f0f9cb136f", size = 53461632 },
|
||||
{ url = "https://files.pythonhosted.org/packages/95/d2/32defba26d995f7acdc4fe3e5911473b25aff5b75c5a2532786435a709e8/hf_xet-1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:b446964bd75eb7f6b4d983c47241b2023eadfad1f56137ed00e1ca6fc278faec", size = 4121808 },
|
||||
{ url = "https://files.pythonhosted.org/packages/de/12/56e1abb9a44cdef59a411fe8a8673313195711b5ecce27880eb9c8fa90bd/hf_xet-1.1.9-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:a3b6215f88638dd7a6ff82cb4e738dcbf3d863bf667997c093a3c990337d1160", size = 2762553 },
|
||||
{ url = "https://files.pythonhosted.org/packages/3a/e6/2d0d16890c5f21b862f5df3146519c182e7f0ae49b4b4bf2bd8a40d0b05e/hf_xet-1.1.9-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9b486de7a64a66f9a172f4b3e0dfe79c9f0a93257c501296a2521a13495a698a", size = 2623216 },
|
||||
{ url = "https://files.pythonhosted.org/packages/81/42/7e6955cf0621e87491a1fb8cad755d5c2517803cea174229b0ec00ff0166/hf_xet-1.1.9-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c5a840c2c4e6ec875ed13703a60e3523bc7f48031dfd750923b2a4d1a5fc3c", size = 3186789 },
|
||||
{ url = "https://files.pythonhosted.org/packages/df/8b/759233bce05457f5f7ec062d63bbfd2d0c740b816279eaaa54be92aa452a/hf_xet-1.1.9-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:96a6139c9e44dad1c52c52520db0fffe948f6bce487cfb9d69c125f254bb3790", size = 3088747 },
|
||||
{ url = "https://files.pythonhosted.org/packages/6c/3c/28cc4db153a7601a996985bcb564f7b8f5b9e1a706c7537aad4b4809f358/hf_xet-1.1.9-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ad1022e9a998e784c97b2173965d07fe33ee26e4594770b7785a8cc8f922cd95", size = 3251429 },
|
||||
{ url = "https://files.pythonhosted.org/packages/84/17/7caf27a1d101bfcb05be85850d4aa0a265b2e1acc2d4d52a48026ef1d299/hf_xet-1.1.9-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:86754c2d6d5afb11b0a435e6e18911a4199262fe77553f8c50d75e21242193ea", size = 3354643 },
|
||||
{ url = "https://files.pythonhosted.org/packages/cd/50/0c39c9eed3411deadcc98749a6699d871b822473f55fe472fad7c01ec588/hf_xet-1.1.9-cp37-abi3-win_amd64.whl", hash = "sha256:5aad3933de6b725d61d51034e04174ed1dce7a57c63d530df0014dea15a40127", size = 2804797 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2708,7 +2708,7 @@ requires-dist = [
|
||||
{ name = "opentelemetry-api", specifier = ">=1.27.0" },
|
||||
{ name = "opentelemetry-exporter-otlp", specifier = ">=1.27.0" },
|
||||
{ name = "opentelemetry-instrumentation-grpc", specifier = ">=0.50b0" },
|
||||
{ name = "outlines", marker = "extra == 'outlines'", specifier = ">=0.1.13" },
|
||||
{ name = "outlines", marker = "extra == 'outlines'", specifier = ">=0.1.13,<1.0" },
|
||||
{ name = "peft", marker = "extra == 'peft'", specifier = ">=0.14.0" },
|
||||
{ name = "pillow", specifier = ">=11.1.0" },
|
||||
{ name = "prometheus-client", specifier = ">=0.21.0" },
|
||||
@ -2872,22 +2872,22 @@ dependencies = [
|
||||
{ name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b1f0cdd0720ad60536deb5baa427b782fd920dd4fcf72e244d32974caafa3b9e" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ac1849553ee673dfafb44c610c60cb60a2890f0e117f43599a526cf777eb8b8c" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:c52c4b869742f00b12cb34521d1381be6119fa46244791704b00cc4a3cb06850" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:47c895bcab508769d129d717a4b916b10225ae3855723aeec8dff8efe5346207" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c4bbc0b4be60319ba1cefc90be9557b317f0b3c261eeceb96ca6e0343eec56bf" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:bf88f647d76d79da9556ca55df49e45aff1d66c12797886364343179dd09a36c" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6bba7dca5d9a729f1e8e9befb98055498e551efaf5ed034824c168b560afc1ac" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7c0f08d1c44a02abad389373dddfce75904b969a410be2f4e5109483dd3dc0ce" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:1704e5dd66c9221e4e8b6ae2d80cbf54e129571e643f5fa9ca78cc6d2096403a" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:633f35e8b1b1f640ef5f8a98dbd84f19b548222ce7ba8f017fe47ce6badc106a" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d2f69f909da5dc52113ec66a851d62079f3d52c83184cf64beebdf12ca2f705c" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:58c749f52ddc9098155c77d6c74153bb13d8978fd6e1063b5d7b41d4644f5af5" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fa05ac6ebed4777de7a5eff398c1f17b697c02422516748ce66a8151873e5a0e" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:78e13c26c38ae92d6841cf9ce760d7e9d52bca3e3183de371812e84274b054dc" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:3559e98be824c2b12ab807319cd61c6174d73a524c9961317de8e8a44133c5c5" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-manylinux_2_28_aarch64.whl" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:2f155388b1200e08f3e901bb3487ff93ca6d63cde87c29b97bb6762a8f63b373" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:f446f97b20cb070747b103fb640df941b88cb68c8d3b01538287d05d56a7e874" },
|
||||
{ url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-win_amd64.whl", hash = "sha256:8614a167d6a163273fb130f586802f3243479862b53ee2843941c10cc5761da6" },
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user