mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
Merge branch 'main' into remove_readme_fix_conflicts
This commit is contained in:
commit
00e0d2d7b4
21
.github/workflows/autodocs.yml
vendored
Normal file
21
.github/workflows/autodocs.yml
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
name: Automatic Documentation for Launcher
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
update_docs:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Install Launcher
|
||||||
|
id: install-launcher
|
||||||
|
run: cargo install --git https://github.com/${{ github.repository }} --branch ${{ github.head_ref }} text-generation-launcher
|
||||||
|
|
||||||
|
- name: Check launcher Docs are up-to-date
|
||||||
|
run: |
|
||||||
|
echo text-generation-launcher --help
|
||||||
|
python update_doc.py --check
|
1255
Cargo.lock
generated
1255
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -8,7 +8,7 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Olivier Dehaene"]
|
authors = ["Olivier Dehaene"]
|
||||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||||
|
17
Dockerfile
17
Dockerfile
@ -111,22 +111,22 @@ RUN make build-flash-attention-v2
|
|||||||
|
|
||||||
# Build Transformers exllama kernels
|
# Build Transformers exllama kernels
|
||||||
FROM kernel-builder as exllama-kernels-builder
|
FROM kernel-builder as exllama-kernels-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
COPY server/exllama_kernels/ .
|
COPY server/exllama_kernels/ .
|
||||||
|
|
||||||
|
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||||
|
|
||||||
|
# Build Transformers awq kernels
|
||||||
|
FROM kernel-builder as awq-kernels-builder
|
||||||
|
WORKDIR /usr/src
|
||||||
|
COPY server/Makefile-awq Makefile
|
||||||
|
# Build specific version of transformers
|
||||||
|
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
||||||
|
|
||||||
# Build Transformers CUDA kernels
|
# Build Transformers CUDA kernels
|
||||||
FROM kernel-builder as custom-kernels-builder
|
FROM kernel-builder as custom-kernels-builder
|
||||||
|
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
|
|
||||||
COPY server/custom_kernels/ .
|
COPY server/custom_kernels/ .
|
||||||
|
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN python setup.py build
|
RUN python setup.py build
|
||||||
|
|
||||||
@ -158,6 +158,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
|
|||||||
libssl-dev \
|
libssl-dev \
|
||||||
ca-certificates \
|
ca-certificates \
|
||||||
make \
|
make \
|
||||||
|
curl \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Copy conda with PyTorch installed
|
# Copy conda with PyTorch installed
|
||||||
@ -175,6 +176,8 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
|
|||||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
# Copy build artifacts from exllama kernels builder
|
# Copy build artifacts from exllama kernels builder
|
||||||
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
# Copy build artifacts from awq kernels builder
|
||||||
|
COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
# Copy builds artifacts from vllm builder
|
# Copy builds artifacts from vllm builder
|
||||||
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
29
README.md
29
README.md
@ -52,13 +52,15 @@ Text Generation Inference (TGI) is a toolkit for deploying and serving Large Lan
|
|||||||
|
|
||||||
## Get Started
|
## Get Started
|
||||||
|
|
||||||
|
### Docker
|
||||||
|
|
||||||
For a detailed starting guide, please see the [Quick Tour](https://huggingface.co/docs/text-generation-inference/quicktour). The easiest way of getting started is using the official Docker container:
|
For a detailed starting guide, please see the [Quick Tour](https://huggingface.co/docs/text-generation-inference/quicktour). The easiest way of getting started is using the official Docker container:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
model=tiiuae/falcon-7b-instruct
|
model=tiiuae/falcon-7b-instruct
|
||||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||||
|
|
||||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.3 --model-id $model
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
|
||||||
```
|
```
|
||||||
|
|
||||||
And then you can make requests like
|
And then you can make requests like
|
||||||
@ -79,8 +81,29 @@ text-generation-launcher --help
|
|||||||
|
|
||||||
### API documentation
|
### API documentation
|
||||||
|
|
||||||
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route. The
|
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route.
|
||||||
Swagger UI is also available at: [https://huggingface.github.io/text-generation-inference](https://huggingface.github.io/text-generation-inference).
|
The Swagger UI is also available at: [https://huggingface.github.io/text-generation-inference](https://huggingface.github.io/text-generation-inference).
|
||||||
|
|
||||||
|
### Using a private or gated model
|
||||||
|
|
||||||
|
You have the option to utilize the `HUGGING_FACE_HUB_TOKEN` environment variable for configuring the token employed by
|
||||||
|
`text-generation-inference`. This allows you to gain access to protected resources.
|
||||||
|
|
||||||
|
For example, if you want to serve the gated Llama V2 model variants:
|
||||||
|
|
||||||
|
1. Go to https://huggingface.co/settings/tokens
|
||||||
|
2. Copy your cli READ token
|
||||||
|
3. Export `HUGGING_FACE_HUB_TOKEN=<your cli READ token>`
|
||||||
|
|
||||||
|
or with Docker:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
model=meta-llama/Llama-2-7b-chat-hf
|
||||||
|
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||||
|
token=<your cli READ token>
|
||||||
|
|
||||||
|
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
|
||||||
|
```
|
||||||
|
|
||||||
### A note on Shared Memory (shm)
|
### A note on Shared Memory (shm)
|
||||||
|
|
||||||
|
@ -14,18 +14,19 @@ name = "text-generation-benchmark"
|
|||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
average = "0.13"
|
average = "0.14"
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
crossterm = "0.26"
|
crossterm = "0.27"
|
||||||
float-ord = "0.3.2"
|
float-ord = "0.3.2"
|
||||||
serde = {version = "1.0.142", features = ["derive"]}
|
serde = {version = "1.0.188", features = ["derive"]}
|
||||||
serde_json = "1.0"
|
serde_json = "1.0"
|
||||||
tabled = "0.12.0"
|
tabled = "0.14.0"
|
||||||
text-generation-client = { path = "../router/client" }
|
text-generation-client = { path = "../router/client" }
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.48"
|
||||||
tokenizers = "0.13.3"
|
tokenizers = { version = "0.14.0", features = ["http"] }
|
||||||
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "macros"] }
|
||||||
tui = {package = "ratatui", version = "0.20", default-features = false, features = ["crossterm"]}
|
tui = {package = "ratatui", version = "0.23", default-features = false, features = ["crossterm"]}
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
|
hf-hub = "0.3.1"
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ use tokio::sync::mpsc;
|
|||||||
use tui::backend::Backend;
|
use tui::backend::Backend;
|
||||||
use tui::layout::{Alignment, Constraint, Direction, Layout};
|
use tui::layout::{Alignment, Constraint, Direction, Layout};
|
||||||
use tui::style::{Color, Modifier, Style};
|
use tui::style::{Color, Modifier, Style};
|
||||||
use tui::text::{Span, Spans};
|
use tui::text::{Line, Span};
|
||||||
use tui::widgets::{
|
use tui::widgets::{
|
||||||
Axis, BarChart, Block, Borders, Chart, Dataset, Gauge, GraphType, Paragraph, Tabs,
|
Axis, BarChart, Block, Borders, Chart, Dataset, Gauge, GraphType, Paragraph, Tabs,
|
||||||
};
|
};
|
||||||
@ -244,7 +244,7 @@ impl App {
|
|||||||
.batch_size
|
.batch_size
|
||||||
.iter()
|
.iter()
|
||||||
.map(|b| {
|
.map(|b| {
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!("Batch: {b}"),
|
format!("Batch: {b}"),
|
||||||
Style::default().fg(Color::White),
|
Style::default().fg(Color::White),
|
||||||
)])
|
)])
|
||||||
@ -468,7 +468,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
|
|||||||
// Latency p50/p90/p99 texts
|
// Latency p50/p90/p99 texts
|
||||||
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
|
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
|
||||||
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
|
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
|
||||||
let span = Spans::from(vec![Span::styled(
|
let span = Line::from(vec![Span::styled(
|
||||||
format!("{name}: {value:.2} ms"),
|
format!("{name}: {value:.2} ms"),
|
||||||
Style::default().fg(colors[i]),
|
Style::default().fg(colors[i]),
|
||||||
)]);
|
)]);
|
||||||
@ -483,16 +483,16 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Average/High/Low spans
|
/// Average/High/Low spans
|
||||||
fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Spans<'a>> {
|
fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Line<'a>> {
|
||||||
vec![
|
vec![
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Average: {:.2} {unit}",
|
"Average: {:.2} {unit}",
|
||||||
data.iter().sum::<f64>() / data.len() as f64
|
data.iter().sum::<f64>() / data.len() as f64
|
||||||
),
|
),
|
||||||
Style::default().fg(Color::LightBlue),
|
Style::default().fg(Color::LightBlue),
|
||||||
)]),
|
)]),
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Lowest: {:.2} {unit}",
|
"Lowest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
@ -501,7 +501,7 @@ fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Spans<'a>> {
|
|||||||
),
|
),
|
||||||
Style::default().fg(Color::Reset),
|
Style::default().fg(Color::Reset),
|
||||||
)]),
|
)]),
|
||||||
Spans::from(vec![Span::styled(
|
Line::from(vec![Span::styled(
|
||||||
format!(
|
format!(
|
||||||
"Highest: {:.2} {unit}",
|
"Highest: {:.2} {unit}",
|
||||||
data.iter()
|
data.iter()
|
||||||
|
@ -33,7 +33,7 @@ pub async fn run(
|
|||||||
watermark: bool,
|
watermark: bool,
|
||||||
do_sample: bool,
|
do_sample: bool,
|
||||||
client: ShardedClient,
|
client: ShardedClient,
|
||||||
) -> Result<(), crossterm::ErrorKind> {
|
) -> Result<(), std::io::Error> {
|
||||||
let parameters = NextTokenChooserParameters {
|
let parameters = NextTokenChooserParameters {
|
||||||
temperature: temperature.unwrap_or(1.0),
|
temperature: temperature.unwrap_or(1.0),
|
||||||
top_k: top_k.unwrap_or(0),
|
top_k: top_k.unwrap_or(0),
|
||||||
|
@ -140,6 +140,8 @@ class Parameters:
|
|||||||
watermark: bool
|
watermark: bool
|
||||||
# Get decoder input token logprobs and ids
|
# Get decoder input token logprobs and ids
|
||||||
decoder_input_details: bool
|
decoder_input_details: bool
|
||||||
|
# Return the N most likely tokens at each step
|
||||||
|
top_n_tokens: Optional[int]
|
||||||
|
|
||||||
# Decoder input tokens
|
# Decoder input tokens
|
||||||
class InputToken:
|
class InputToken:
|
||||||
@ -189,6 +191,8 @@ class BestOfSequence:
|
|||||||
prefill: List[InputToken]
|
prefill: List[InputToken]
|
||||||
# Generated tokens
|
# Generated tokens
|
||||||
tokens: List[Token]
|
tokens: List[Token]
|
||||||
|
# Most likely tokens
|
||||||
|
top_tokens: Optional[List[List[Token]]]
|
||||||
|
|
||||||
|
|
||||||
# `generate` details
|
# `generate` details
|
||||||
@ -203,6 +207,8 @@ class Details:
|
|||||||
prefill: List[InputToken]
|
prefill: List[InputToken]
|
||||||
# Generated tokens
|
# Generated tokens
|
||||||
tokens: List[Token]
|
tokens: List[Token]
|
||||||
|
# Most likely tokens
|
||||||
|
top_tokens: Optional[List[List[Token]]]
|
||||||
# Additional sequences when using the `best_of` parameter
|
# Additional sequences when using the `best_of` parameter
|
||||||
best_of_sequences: Optional[List[BestOfSequence]]
|
best_of_sequences: Optional[List[BestOfSequence]]
|
||||||
|
|
||||||
@ -229,6 +235,8 @@ class StreamDetails:
|
|||||||
class StreamResponse:
|
class StreamResponse:
|
||||||
# Generated token
|
# Generated token
|
||||||
token: Token
|
token: Token
|
||||||
|
# Most likely tokens
|
||||||
|
top_tokens: Optional[List[Token]]
|
||||||
# Complete generated text
|
# Complete generated text
|
||||||
# Only available when the generation is finished
|
# Only available when the generation is finished
|
||||||
generated_text: Optional[str]
|
generated_text: Optional[str]
|
||||||
|
201
clients/python/poetry.lock
generated
201
clients/python/poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
@ -124,6 +124,20 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
frozenlist = ">=1.1.0"
|
frozenlist = ">=1.1.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "annotated-types"
|
||||||
|
version = "0.5.0"
|
||||||
|
description = "Reusable constraint types to use with typing.Annotated"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "annotated_types-0.5.0-py3-none-any.whl", hash = "sha256:58da39888f92c276ad970249761ebea80ba544b77acddaa1a4d6cf78287d45fd"},
|
||||||
|
{file = "annotated_types-0.5.0.tar.gz", hash = "sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-timeout"
|
name = "async-timeout"
|
||||||
version = "4.0.3"
|
version = "4.0.3"
|
||||||
@ -693,55 +707,140 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pydantic"
|
name = "pydantic"
|
||||||
version = "1.10.12"
|
version = "2.4.2"
|
||||||
description = "Data validation and settings management using python type hints"
|
description = "Data validation using Python type hints"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"},
|
{file = "pydantic-2.4.2-py3-none-any.whl", hash = "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1"},
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"},
|
{file = "pydantic-2.4.2.tar.gz", hash = "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7"},
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"},
|
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"},
|
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"},
|
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"},
|
|
||||||
{file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"},
|
|
||||||
{file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"},
|
|
||||||
{file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"},
|
|
||||||
{file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"},
|
|
||||||
{file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"},
|
|
||||||
{file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"},
|
|
||||||
{file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
typing-extensions = ">=4.2.0"
|
annotated-types = ">=0.4.0"
|
||||||
|
pydantic-core = "2.10.1"
|
||||||
|
typing-extensions = ">=4.6.1"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
dotenv = ["python-dotenv (>=0.10.4)"]
|
email = ["email-validator (>=2.0.0)"]
|
||||||
email = ["email-validator (>=1.0.3)"]
|
|
||||||
|
[[package]]
|
||||||
|
name = "pydantic-core"
|
||||||
|
version = "2.10.1"
|
||||||
|
description = ""
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-none-win32.whl", hash = "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp310-none-win_amd64.whl", hash = "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-none-win32.whl", hash = "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-none-win_amd64.whl", hash = "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp311-none-win_arm64.whl", hash = "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-none-win32.whl", hash = "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-none-win_amd64.whl", hash = "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp312-none-win_arm64.whl", hash = "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-none-win32.whl", hash = "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp37-none-win_amd64.whl", hash = "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-none-win32.whl", hash = "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp38-none-win_amd64.whl", hash = "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-none-win32.whl", hash = "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a"},
|
||||||
|
{file = "pydantic_core-2.10.1-cp39-none-win_amd64.whl", hash = "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7"},
|
||||||
|
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776"},
|
||||||
|
{file = "pydantic_core-2.10.1.tar.gz", hash = "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest"
|
name = "pytest"
|
||||||
@ -816,6 +915,7 @@ files = [
|
|||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
||||||
|
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
||||||
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
||||||
@ -823,8 +923,15 @@ files = [
|
|||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
||||||
|
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
||||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||||
|
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
||||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
||||||
@ -841,6 +948,7 @@ files = [
|
|||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
||||||
|
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
||||||
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
||||||
@ -848,6 +956,7 @@ files = [
|
|||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
||||||
|
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
||||||
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
||||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||||
@ -929,13 +1038,13 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "2.0.4"
|
version = "2.0.5"
|
||||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
|
{file = "urllib3-2.0.5-py3-none-any.whl", hash = "sha256:ef16afa8ba34a1f989db38e1dbbe0c302e4289a47856990d0682e374563ce35e"},
|
||||||
{file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
|
{file = "urllib3-2.0.5.tar.gz", hash = "sha256:13abf37382ea2ce6fb744d4dad67838eec857c9f4f57009891805e0b5e123594"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@ -1050,4 +1159,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.7"
|
python-versions = "^3.7"
|
||||||
content-hash = "0db2f97d52c557dd7f90c55b4ad5bbe308c957c5f7f99fec53c57e0a13822cb4"
|
content-hash = "b7fab8703967f2616ea59a98a437cd30f97f0c8d2a06e399d688814a2a2c64f8"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation"
|
name = "text-generation"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
description = "Hugging Face Text Generation Python Client"
|
description = "Hugging Face Text Generation Python Client"
|
||||||
license = "Apache-2.0"
|
license = "Apache-2.0"
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
@ -137,7 +137,7 @@ class Client:
|
|||||||
typical_p=typical_p,
|
typical_p=typical_p,
|
||||||
watermark=watermark,
|
watermark=watermark,
|
||||||
decoder_input_details=decoder_input_details,
|
decoder_input_details=decoder_input_details,
|
||||||
top_n_tokens=top_n_tokens
|
top_n_tokens=top_n_tokens,
|
||||||
)
|
)
|
||||||
request = Request(inputs=prompt, stream=False, parameters=parameters)
|
request = Request(inputs=prompt, stream=False, parameters=parameters)
|
||||||
|
|
||||||
@ -482,7 +482,6 @@ class AsyncClient:
|
|||||||
headers=self.headers, cookies=self.cookies, timeout=self.timeout
|
headers=self.headers, cookies=self.cookies, timeout=self.timeout
|
||||||
) as session:
|
) as session:
|
||||||
async with session.post(self.base_url, json=request.dict()) as resp:
|
async with session.post(self.base_url, json=request.dict()) as resp:
|
||||||
|
|
||||||
if resp.status != 200:
|
if resp.status != 200:
|
||||||
raise parse_error(resp.status, await resp.json())
|
raise parse_error(resp.status, await resp.json())
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ class Parameters(BaseModel):
|
|||||||
# Get decoder input token logprobs and ids
|
# Get decoder input token logprobs and ids
|
||||||
decoder_input_details: bool = False
|
decoder_input_details: bool = False
|
||||||
# Return the N most likely tokens at each step
|
# Return the N most likely tokens at each step
|
||||||
top_n_tokens: Optional[int]
|
top_n_tokens: Optional[int] = None
|
||||||
|
|
||||||
@validator("best_of")
|
@validator("best_of")
|
||||||
def valid_best_of(cls, field_value, values):
|
def valid_best_of(cls, field_value, values):
|
||||||
@ -133,7 +133,9 @@ class Request(BaseModel):
|
|||||||
and parameters.best_of > 1
|
and parameters.best_of > 1
|
||||||
and field_value
|
and field_value
|
||||||
):
|
):
|
||||||
raise ValidationError("`best_of` != 1 is not supported when `stream` == True")
|
raise ValidationError(
|
||||||
|
"`best_of` != 1 is not supported when `stream` == True"
|
||||||
|
)
|
||||||
return field_value
|
return field_value
|
||||||
|
|
||||||
|
|
||||||
@ -186,7 +188,7 @@ class BestOfSequence(BaseModel):
|
|||||||
# Generated tokens
|
# Generated tokens
|
||||||
tokens: List[Token]
|
tokens: List[Token]
|
||||||
# Most likely tokens
|
# Most likely tokens
|
||||||
top_tokens: Optional[List[List[Token]]]
|
top_tokens: Optional[List[List[Token]]] = None
|
||||||
|
|
||||||
|
|
||||||
# `generate` details
|
# `generate` details
|
||||||
@ -202,7 +204,7 @@ class Details(BaseModel):
|
|||||||
# Generated tokens
|
# Generated tokens
|
||||||
tokens: List[Token]
|
tokens: List[Token]
|
||||||
# Most likely tokens
|
# Most likely tokens
|
||||||
top_tokens: Optional[List[List[Token]]]
|
top_tokens: Optional[List[List[Token]]] = None
|
||||||
# Additional sequences when using the `best_of` parameter
|
# Additional sequences when using the `best_of` parameter
|
||||||
best_of_sequences: Optional[List[BestOfSequence]] = None
|
best_of_sequences: Optional[List[BestOfSequence]] = None
|
||||||
|
|
||||||
@ -230,7 +232,7 @@ class StreamResponse(BaseModel):
|
|||||||
# Generated token
|
# Generated token
|
||||||
token: Token
|
token: Token
|
||||||
# Most likely tokens
|
# Most likely tokens
|
||||||
top_tokens: Optional[List[Token]]
|
top_tokens: Optional[List[Token]] = None
|
||||||
# Complete generated text
|
# Complete generated text
|
||||||
# Only available when the generation is finished
|
# Only available when the generation is finished
|
||||||
generated_text: Optional[str] = None
|
generated_text: Optional[str] = None
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
"name": "Apache 2.0",
|
"name": "Apache 2.0",
|
||||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||||
},
|
},
|
||||||
"version": "1.0.3"
|
"version": "1.1.0"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"/": {
|
"/": {
|
||||||
|
@ -17,10 +17,22 @@
|
|||||||
title: Serving Private & Gated Models
|
title: Serving Private & Gated Models
|
||||||
- local: basic_tutorials/using_cli
|
- local: basic_tutorials/using_cli
|
||||||
title: Using TGI CLI
|
title: Using TGI CLI
|
||||||
|
- local: basic_tutorials/launcher
|
||||||
|
title: All TGI CLI options
|
||||||
|
- local: basic_tutorials/non_core_models
|
||||||
|
title: Non-core Model Serving
|
||||||
title: Tutorials
|
title: Tutorials
|
||||||
- sections:
|
- sections:
|
||||||
- local: conceptual/streaming
|
- local: conceptual/streaming
|
||||||
title: Streaming
|
title: Streaming
|
||||||
|
- local: conceptual/quantization
|
||||||
|
title: Quantization
|
||||||
|
- local: conceptual/tensor_parallelism
|
||||||
|
title: Tensor Parallelism
|
||||||
|
- local: conceptual/paged_attention
|
||||||
|
title: PagedAttention
|
||||||
|
- local: conceptual/safetensors
|
||||||
|
title: Safetensors
|
||||||
- local: conceptual/flash_attention
|
- local: conceptual/flash_attention
|
||||||
title: Flash Attention
|
title: Flash Attention
|
||||||
title: Conceptual Guides
|
title: Conceptual Guides
|
||||||
|
@ -19,6 +19,6 @@ docker run --gpus all \
|
|||||||
--shm-size 1g \
|
--shm-size 1g \
|
||||||
-e HUGGING_FACE_HUB_TOKEN=$token \
|
-e HUGGING_FACE_HUB_TOKEN=$token \
|
||||||
-p 8080:80 \
|
-p 8080:80 \
|
||||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.1 \
|
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 \
|
||||||
--model-id $model
|
--model-id $model
|
||||||
```
|
```
|
||||||
|
247
docs/source/basic_tutorials/launcher.md
Normal file
247
docs/source/basic_tutorials/launcher.md
Normal file
@ -0,0 +1,247 @@
|
|||||||
|
# Text-generation-launcher arguments
|
||||||
|
|
||||||
|
<!-- WRAP CODE BLOCKS -->
|
||||||
|
|
||||||
|
```
|
||||||
|
Text Generation Launcher
|
||||||
|
|
||||||
|
Usage: text-generation-launcher [OPTIONS]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--model-id <MODEL_ID>
|
||||||
|
The name of the model to load. Can be a MODEL_ID as listed on <https://hf.co/models> like `gpt2` or `OpenAssistant/oasst-sft-1-pythia-12b`. Or it can be a local directory containing the necessary files as saved by `save_pretrained(...)` methods of transformers
|
||||||
|
|
||||||
|
[env: MODEL_ID=]
|
||||||
|
[default: bigscience/bloom-560m]
|
||||||
|
|
||||||
|
--revision <REVISION>
|
||||||
|
The actual revision of the model if you're referring to a model on the hub. You can use a specific commit id or a branch like `refs/pr/2`
|
||||||
|
|
||||||
|
[env: REVISION=]
|
||||||
|
|
||||||
|
--validation-workers <VALIDATION_WORKERS>
|
||||||
|
The number of tokenizer workers used for payload validation and truncation inside the router
|
||||||
|
|
||||||
|
[env: VALIDATION_WORKERS=]
|
||||||
|
[default: 2]
|
||||||
|
|
||||||
|
--sharded <SHARDED>
|
||||||
|
Whether to shard the model across multiple GPUs By default text-generation-inference will use all available GPUs to run the model. Setting it to `false` deactivates `num_shard`
|
||||||
|
|
||||||
|
[env: SHARDED=]
|
||||||
|
[possible values: true, false]
|
||||||
|
|
||||||
|
--num-shard <NUM_SHARD>
|
||||||
|
The number of shards to use if you don't want to use all GPUs on a given machine. You can use `CUDA_VISIBLE_DEVICES=0,1 text-generation-launcher... --num_shard 2` and `CUDA_VISIBLE_DEVICES=2,3 text-generation-launcher... --num_shard 2` to launch 2 copies with 2 shard each on a given machine with 4 GPUs for instance
|
||||||
|
|
||||||
|
[env: NUM_SHARD=]
|
||||||
|
|
||||||
|
--quantize <QUANTIZE>
|
||||||
|
Whether you want the model to be quantized
|
||||||
|
|
||||||
|
[env: QUANTIZE=]
|
||||||
|
|
||||||
|
Possible values:
|
||||||
|
- awq: 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=awq. Should replace GPTQ models whereever possible because of the better latency
|
||||||
|
- eetq: 8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from https://github.com/NetEase-FuXi/EETQ.git
|
||||||
|
- gptq: 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq. text-generation-inference will use exllama (faster) kernels whereever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
|
||||||
|
- bitsandbytes: Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half, but it is known that the model will be much slower to run than the native f16
|
||||||
|
- bitsandbytes-nf4: Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x, but it is known that the model will be much slower to run than the native f16
|
||||||
|
- bitsandbytes-fp4: Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better perplexity performance for you model
|
||||||
|
|
||||||
|
--dtype <DTYPE>
|
||||||
|
The dtype to be forced upon the model. This option cannot be used with `--quantize`
|
||||||
|
|
||||||
|
[env: DTYPE=]
|
||||||
|
[possible values: float16, bfloat16]
|
||||||
|
|
||||||
|
--trust-remote-code
|
||||||
|
Whether you want to execute hub modelling code. Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision
|
||||||
|
|
||||||
|
[env: TRUST_REMOTE_CODE=]
|
||||||
|
|
||||||
|
--max-concurrent-requests <MAX_CONCURRENT_REQUESTS>
|
||||||
|
The maximum amount of concurrent requests for this particular deployment. Having a low limit will refuse clients requests instead of having them wait for too long and is usually good to handle backpressure correctly
|
||||||
|
|
||||||
|
[env: MAX_CONCURRENT_REQUESTS=]
|
||||||
|
[default: 128]
|
||||||
|
|
||||||
|
--max-best-of <MAX_BEST_OF>
|
||||||
|
This is the maximum allowed value for clients to set `best_of`. Best of makes `n` generations at the same time, and return the best in terms of overall log probability over the entire generated sequence
|
||||||
|
|
||||||
|
[env: MAX_BEST_OF=]
|
||||||
|
[default: 2]
|
||||||
|
|
||||||
|
--max-stop-sequences <MAX_STOP_SEQUENCES>
|
||||||
|
This is the maximum allowed value for clients to set `stop_sequences`. Stop sequences are used to allow the model to stop on more than just the EOS token, and enable more complex "prompting" where users can preprompt the model in a specific way and define their "own" stop token aligned with their prompt
|
||||||
|
|
||||||
|
[env: MAX_STOP_SEQUENCES=]
|
||||||
|
[default: 4]
|
||||||
|
|
||||||
|
--max-top-n-tokens <MAX_TOP_N_TOKENS>
|
||||||
|
This is the maximum allowed value for clients to set `top_n_tokens`. `top_n_tokens is used to return information about the the `n` most likely tokens at each generation step, instead of just the sampled token. This information can be used for downstream tasks like for classification or ranking
|
||||||
|
|
||||||
|
[env: MAX_TOP_N_TOKENS=]
|
||||||
|
[default: 5]
|
||||||
|
|
||||||
|
--max-input-length <MAX_INPUT_LENGTH>
|
||||||
|
This is the maximum allowed input length (expressed in number of tokens) for users. The larger this value, the longer prompt users can send which can impact the overall memory required to handle the load. Please note that some models have a finite range of sequence they can handle
|
||||||
|
|
||||||
|
[env: MAX_INPUT_LENGTH=]
|
||||||
|
[default: 1024]
|
||||||
|
|
||||||
|
--max-total-tokens <MAX_TOTAL_TOKENS>
|
||||||
|
This is the most important value to set as it defines the "memory budget" of running clients requests. Clients will send input sequences and ask to generate `max_new_tokens` on top. with a value of `1512` users can send either a prompt of `1000` and ask for `512` new tokens, or send a prompt of `1` and ask for `1511` max_new_tokens. The larger this value, the larger amount each request will be in your RAM and the less effective batching can be
|
||||||
|
|
||||||
|
[env: MAX_TOTAL_TOKENS=]
|
||||||
|
[default: 2048]
|
||||||
|
|
||||||
|
--waiting-served-ratio <WAITING_SERVED_RATIO>
|
||||||
|
This represents the ratio of waiting queries vs running queries where you want to start considering pausing the running queries to include the waiting ones into the same batch. `waiting_served_ratio=1.2` Means when 12 queries are waiting and there's only 10 queries left in the current batch we check if we can fit those 12 waiting queries into the batching strategy, and if yes, then batching happens delaying the 10 running queries by a `prefill` run.
|
||||||
|
|
||||||
|
This setting is only applied if there is room in the batch as defined by `max_batch_total_tokens`.
|
||||||
|
|
||||||
|
[env: WAITING_SERVED_RATIO=]
|
||||||
|
[default: 1.2]
|
||||||
|
|
||||||
|
--max-batch-prefill-tokens <MAX_BATCH_PREFILL_TOKENS>
|
||||||
|
Limits the number of tokens for the prefill operation. Since this operation take the most memory and is compute bound, it is interesting to limit the number of requests that can be sent
|
||||||
|
|
||||||
|
[env: MAX_BATCH_PREFILL_TOKENS=]
|
||||||
|
[default: 4096]
|
||||||
|
|
||||||
|
--max-batch-total-tokens <MAX_BATCH_TOTAL_TOKENS>
|
||||||
|
**IMPORTANT** This is one critical control to allow maximum usage of the available hardware.
|
||||||
|
|
||||||
|
This represents the total amount of potential tokens within a batch. When using padding (not recommended) this would be equivalent of `batch_size` * `max_total_tokens`.
|
||||||
|
|
||||||
|
However in the non-padded (flash attention) version this can be much finer.
|
||||||
|
|
||||||
|
For `max_batch_total_tokens=1000`, you could fit `10` queries of `total_tokens=100` or a single query of `1000` tokens.
|
||||||
|
|
||||||
|
Overall this number should be the largest possible amount that fits the remaining memory (after the model is loaded). Since the actual memory overhead depends on other parameters like if you're using quantization, flash attention or the model implementation, text-generation-inference cannot infer this number automatically.
|
||||||
|
|
||||||
|
[env: MAX_BATCH_TOTAL_TOKENS=]
|
||||||
|
|
||||||
|
--max-waiting-tokens <MAX_WAITING_TOKENS>
|
||||||
|
This setting defines how many tokens can be passed before forcing the waiting queries to be put on the batch (if the size of the batch allows for it). New queries require 1 `prefill` forward, which is different from `decode` and therefore you need to pause the running batch in order to run `prefill` to create the correct values for the waiting queries to be able to join the batch.
|
||||||
|
|
||||||
|
With a value too small, queries will always "steal" the compute to run `prefill` and running queries will be delayed by a lot.
|
||||||
|
|
||||||
|
With a value too big, waiting queries could wait for a very long time before being allowed a slot in the running batch. If your server is busy that means that requests that could run in ~2s on an empty server could end up running in ~20s because the query had to wait for 18s.
|
||||||
|
|
||||||
|
This number is expressed in number of tokens to make it a bit more "model" agnostic, but what should really matter is the overall latency for end users.
|
||||||
|
|
||||||
|
[env: MAX_WAITING_TOKENS=]
|
||||||
|
[default: 20]
|
||||||
|
|
||||||
|
--hostname <HOSTNAME>
|
||||||
|
The IP address to listen on
|
||||||
|
|
||||||
|
[env: HOSTNAME=]
|
||||||
|
[default: 0.0.0.0]
|
||||||
|
|
||||||
|
-p, --port <PORT>
|
||||||
|
The port to listen on
|
||||||
|
|
||||||
|
[env: PORT=]
|
||||||
|
[default: 3000]
|
||||||
|
|
||||||
|
--shard-uds-path <SHARD_UDS_PATH>
|
||||||
|
The name of the socket for gRPC communication between the webserver and the shards
|
||||||
|
|
||||||
|
[env: SHARD_UDS_PATH=]
|
||||||
|
[default: /tmp/text-generation-server]
|
||||||
|
|
||||||
|
--master-addr <MASTER_ADDR>
|
||||||
|
The address the master shard will listen on. (setting used by torch distributed)
|
||||||
|
|
||||||
|
[env: MASTER_ADDR=]
|
||||||
|
[default: localhost]
|
||||||
|
|
||||||
|
--master-port <MASTER_PORT>
|
||||||
|
The address the master port will listen on. (setting used by torch distributed)
|
||||||
|
|
||||||
|
[env: MASTER_PORT=]
|
||||||
|
[default: 29500]
|
||||||
|
|
||||||
|
--huggingface-hub-cache <HUGGINGFACE_HUB_CACHE>
|
||||||
|
The location of the huggingface hub cache. Used to override the location if you want to provide a mounted disk for instance
|
||||||
|
|
||||||
|
[env: HUGGINGFACE_HUB_CACHE=]
|
||||||
|
|
||||||
|
--weights-cache-override <WEIGHTS_CACHE_OVERRIDE>
|
||||||
|
The location of the huggingface hub cache. Used to override the location if you want to provide a mounted disk for instance
|
||||||
|
|
||||||
|
[env: WEIGHTS_CACHE_OVERRIDE=]
|
||||||
|
|
||||||
|
--disable-custom-kernels
|
||||||
|
For some models (like bloom), text-generation-inference implemented custom cuda kernels to speed up inference. Those kernels were only tested on A100. Use this flag to disable them if you're running on different hardware and encounter issues
|
||||||
|
|
||||||
|
[env: DISABLE_CUSTOM_KERNELS=]
|
||||||
|
|
||||||
|
--cuda-memory-fraction <CUDA_MEMORY_FRACTION>
|
||||||
|
Limit the CUDA available memory. The allowed value equals the total visible memory multiplied by cuda-memory-fraction
|
||||||
|
|
||||||
|
[env: CUDA_MEMORY_FRACTION=]
|
||||||
|
[default: 1.0]
|
||||||
|
|
||||||
|
--rope-scaling <ROPE_SCALING>
|
||||||
|
Rope scaling will only be used for RoPE models and allow rescaling the position rotary to accomodate for larger prompts.
|
||||||
|
|
||||||
|
Goes together with `rope_factor`.
|
||||||
|
|
||||||
|
`--rope-factor 2.0` gives linear scaling with a factor of 2.0 `--rope-scaling dynamic` gives dynamic scaling with a factor of 1.0 `--rope-scaling linear` gives linear scaling with a factor of 1.0 (Nothing will be changed basically)
|
||||||
|
|
||||||
|
`--rope-scaling linear --rope-factor` fully describes the scaling you want
|
||||||
|
|
||||||
|
[env: ROPE_SCALING=]
|
||||||
|
[possible values: linear, dynamic]
|
||||||
|
|
||||||
|
--rope-factor <ROPE_FACTOR>
|
||||||
|
Rope scaling will only be used for RoPE models See `rope_scaling`
|
||||||
|
|
||||||
|
[env: ROPE_FACTOR=]
|
||||||
|
|
||||||
|
--json-output
|
||||||
|
Outputs the logs in JSON format (useful for telemetry)
|
||||||
|
|
||||||
|
[env: JSON_OUTPUT=]
|
||||||
|
|
||||||
|
--otlp-endpoint <OTLP_ENDPOINT>
|
||||||
|
[env: OTLP_ENDPOINT=]
|
||||||
|
|
||||||
|
--cors-allow-origin <CORS_ALLOW_ORIGIN>
|
||||||
|
[env: CORS_ALLOW_ORIGIN=]
|
||||||
|
|
||||||
|
--watermark-gamma <WATERMARK_GAMMA>
|
||||||
|
[env: WATERMARK_GAMMA=]
|
||||||
|
|
||||||
|
--watermark-delta <WATERMARK_DELTA>
|
||||||
|
[env: WATERMARK_DELTA=]
|
||||||
|
|
||||||
|
--ngrok
|
||||||
|
Enable ngrok tunneling
|
||||||
|
|
||||||
|
[env: NGROK=]
|
||||||
|
|
||||||
|
--ngrok-authtoken <NGROK_AUTHTOKEN>
|
||||||
|
ngrok authentication token
|
||||||
|
|
||||||
|
[env: NGROK_AUTHTOKEN=]
|
||||||
|
|
||||||
|
--ngrok-edge <NGROK_EDGE>
|
||||||
|
ngrok edge
|
||||||
|
|
||||||
|
[env: NGROK_EDGE=]
|
||||||
|
|
||||||
|
-e, --env
|
||||||
|
Display a lot of information about your runtime environment
|
||||||
|
|
||||||
|
-h, --help
|
||||||
|
Print help (see a summary with '-h')
|
||||||
|
|
||||||
|
-V, --version
|
||||||
|
Print version
|
||||||
|
|
||||||
|
```
|
24
docs/source/basic_tutorials/non_core_models.md
Normal file
24
docs/source/basic_tutorials/non_core_models.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# Non-core Model Serving
|
||||||
|
|
||||||
|
TGI supports various LLM architectures (see full list [here](../supported_models)). If you wish to serve a model that is not one of the supported models, TGI will fallback to the `transformers` implementation of that model. This means you will be unable to use some of the features introduced by TGI, such as tensor-parallel sharding or flash attention. However, you can still get many benefits of TGI, such as continuous batching or streaming outputs.
|
||||||
|
|
||||||
|
You can serve these models using the same Docker command-line invocation as with fully supported models 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id gpt2
|
||||||
|
```
|
||||||
|
|
||||||
|
If the model you wish to serve is a custom transformers model, and its weights and implementation are available in the Hub, you can still serve the model by passing the `--trust-remote-code` flag to the `docker run` command like below 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id <CUSTOM_MODEL_ID> --trust-remote-code
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, if the model is not on Hugging Face Hub but on your local, you can pass the path to the folder that contains your model like below 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Make sure your model is in the $volume directory
|
||||||
|
docker run --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id /data/<PATH-TO-FOLDER>
|
||||||
|
```
|
||||||
|
|
||||||
|
You can refer to [transformers docs on custom models](https://huggingface.co/docs/transformers/main/en/custom_models) for more information.
|
@ -4,7 +4,7 @@ Text Generation Inference improves the model in several aspects.
|
|||||||
|
|
||||||
## Quantization
|
## Quantization
|
||||||
|
|
||||||
TGI supports [bits-and-bytes](https://github.com/TimDettmers/bitsandbytes#bitsandbytes) and [GPT-Q](https://arxiv.org/abs/2210.17323) quantization. To speed up inference with quantization, simply set `quantize` flag to `bitsandbytes` or `gptq` depending on the quantization technique you wish to use. When using GPT-Q quantization, you need to point to one of the models [here](https://huggingface.co/models?search=gptq).
|
TGI supports [bits-and-bytes](https://github.com/TimDettmers/bitsandbytes#bitsandbytes), [GPT-Q](https://arxiv.org/abs/2210.17323) and [AWQ](https://arxiv.org/abs/2306.00978) quantization. To speed up inference with quantization, simply set `quantize` flag to `bitsandbytes`, `gptq` or `awq` depending on the quantization technique you wish to use. When using GPT-Q quantization, you need to point to one of the models [here](https://huggingface.co/models?search=gptq) when using AWQ quantization, you need to point to one of the models [here](https://huggingface.co/models?search=awq). To get more information about quantization, please refer to (./conceptual/quantization.md)
|
||||||
|
|
||||||
|
|
||||||
## RoPE Scaling
|
## RoPE Scaling
|
||||||
|
9
docs/source/conceptual/paged_attention.md
Normal file
9
docs/source/conceptual/paged_attention.md
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# PagedAttention
|
||||||
|
|
||||||
|
LLMs struggle with memory limitations during generation. In the decoding part of generation, all the attention keys and values generated for previous tokens are stored in GPU memory for reuse. This is called _KV cache_, and it may take up a large amount of memory for large models and long sequences.
|
||||||
|
|
||||||
|
PagedAttention attempts to optimize memory use by partitioning the KV cache into blocks that are accessed through a lookup table. Thus, the KV cache does not need to be stored in contiguous memory, and blocks are allocated as needed. The memory efficiency can increase GPU utilization on memory-bound workloads, so more inference batches can be supported.
|
||||||
|
|
||||||
|
The use of a lookup table to access the memory blocks can also help with KV sharing across multiple generations. This is helpful for techniques such as _parallel sampling_, where multiple outputs are generated simultaneously for the same prompt. In this case, the cached KV blocks can be shared among the generations.
|
||||||
|
|
||||||
|
TGI's PagedAttention implementation leverages the custom cuda kernels developed by the [vLLM Project](https://github.com/vllm-project/vllm). You can learn more about this technique in the [project's page](https://vllm.ai/).
|
59
docs/source/conceptual/quantization.md
Normal file
59
docs/source/conceptual/quantization.md
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
# Quantization
|
||||||
|
|
||||||
|
TGI offers GPTQ and bits-and-bytes quantization to quantize large language models.
|
||||||
|
|
||||||
|
## Quantization with GPTQ
|
||||||
|
|
||||||
|
GPTQ is a post-training quantization method to make the model smaller. It quantizes the layers by finding a compressed version of that weight, that will yield a minimum mean squared error like below 👇
|
||||||
|
|
||||||
|
Given a layer \\(l\\) with weight matrix \\(W_{l}\\) and layer input \\(X_{l}\\), find quantized weight \\(\\hat{W}_{l}\\):
|
||||||
|
|
||||||
|
$$({\hat{W}_{l}}^{*} = argmin_{\hat{W_{l}}} ||W_{l}X-\hat{W}_{l}X||^{2}_{2})$$
|
||||||
|
|
||||||
|
|
||||||
|
TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize gptq
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
|
||||||
|
|
||||||
|
To quantize a given model using GPTQ with a calibration dataset, simply run
|
||||||
|
|
||||||
|
```bash
|
||||||
|
text-generation-server quantize tiiuae/falcon-40b /data/falcon-40b-gptq
|
||||||
|
# Add --upload-to-model-id MYUSERNAME/falcon-40b to push the created model to the hub directly
|
||||||
|
```
|
||||||
|
|
||||||
|
This will create a new directory with the quantized files which you can use with,
|
||||||
|
|
||||||
|
```bash
|
||||||
|
text-generation-launcher --model-id /data/falcon-40b-gptq/ --sharded true --num-shard 2 --quantize gptq
|
||||||
|
```
|
||||||
|
|
||||||
|
You can learn more about the quantization options by running `text-generation-server quantize --help`.
|
||||||
|
|
||||||
|
If you wish to do more with GPTQ models (e.g. train an adapter on top), you can read about transformers GPTQ integration [here](https://huggingface.co/blog/gptq-integration).
|
||||||
|
You can learn more about GPTQ from the [paper](https://arxiv.org/pdf/2210.17323.pdf).
|
||||||
|
|
||||||
|
## Quantization with bitsandbytes
|
||||||
|
|
||||||
|
bitsandbytes is a library used to apply 8-bit and 4-bit quantization to models. Unlike GPTQ quantization, bitsandbytes doesn't require a calibration dataset or any post-processing – weights are automatically quantized on load. However, inference with bitsandbytes is slower than GPTQ or FP16 precision.
|
||||||
|
|
||||||
|
8-bit quantization enables multi-billion parameter scale models to fit in smaller hardware without degrading performance too much.
|
||||||
|
In TGI, you can use 8-bit quantization by adding `--quantize bitsandbytes` like below 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize --bitsandbytes
|
||||||
|
```
|
||||||
|
|
||||||
|
4-bit quantization is also possible with bitsandbytes. You can choose one of the following 4-bit data types: 4-bit float (`fp4`), or 4-bit `NormalFloat` (`nf4`). These data types were introduced in the context of parameter-efficient fine-tuning, but you can apply them for inference by automatically converting the model weights on load.
|
||||||
|
|
||||||
|
In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize --bitsandbytes-nf4
|
||||||
|
```
|
||||||
|
|
||||||
|
You can get more information about 8-bit quantization by reading this [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), and 4-bit quantization by reading [this blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes).
|
7
docs/source/conceptual/safetensors.md
Normal file
7
docs/source/conceptual/safetensors.md
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
# Safetensors
|
||||||
|
|
||||||
|
Safetensors is a model serialization format for deep learning models. It is [faster](https://huggingface.co/docs/safetensors/speed) and safer compared to other serialization formats like pickle (which is used under the hood in many deep learning libraries).
|
||||||
|
|
||||||
|
TGI depends on safetensors format mainly to enable [tensor parallelism sharding](./tensor_parallelism). For a given model repository during serving, TGI looks for safetensors weights. If there are no safetensors weights, TGI converts the PyTorch weights to safetensors format.
|
||||||
|
|
||||||
|
You can learn more about safetensors by reading the [safetensors documentation](https://huggingface.co/docs/safetensors/index).
|
14
docs/source/conceptual/tensor_parallelism.md
Normal file
14
docs/source/conceptual/tensor_parallelism.md
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
# Tensor Parallelism
|
||||||
|
|
||||||
|
Tensor parallelism is a technique used to fit a large model in multiple GPUs. For example, when multiplying the input tensors with the first weight tensor, the matrix multiplication is equivalent to splitting the weight tensor column-wise, multiplying each column with the input separately, and then concatenating the separate outputs. These outputs are then transferred from the GPUs and concatenated together to get the final result, like below 👇
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
|
||||||
|
<Tip warning={true}>
|
||||||
|
|
||||||
|
Tensor Parallelism only works for [models officially supported](../supported_models), it will not work when falling back to `transformers`. You can get more information about unsupported models [here](../basic_tutorials/non_core_models).
|
||||||
|
|
||||||
|
</Tip>
|
||||||
|
|
||||||
|
You can learn a lot more details about tensor-parallelism from [the `transformers` docs](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#tensor-parallelism).
|
@ -8,7 +8,7 @@ Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/
|
|||||||
model=tiiuae/falcon-7b-instruct
|
model=tiiuae/falcon-7b-instruct
|
||||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||||
|
|
||||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.0.3 --model-id $model
|
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.1.0 --model-id $model
|
||||||
```
|
```
|
||||||
|
|
||||||
<Tip warning={true}>
|
<Tip warning={true}>
|
||||||
@ -85,7 +85,7 @@ curl 127.0.0.1:8080/generate \
|
|||||||
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
|
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
docker run ghcr.io/huggingface/text-generation-inference:1.0.3 --help
|
docker run ghcr.io/huggingface/text-generation-inference:1.1.0 --help
|
||||||
```
|
```
|
||||||
|
|
||||||
</Tip>
|
</Tip>
|
||||||
|
@ -18,7 +18,8 @@ The following models are optimized and can be served with TGI, which uses custom
|
|||||||
- [Falcon 40B](https://huggingface.co/tiiuae/falcon-40b)
|
- [Falcon 40B](https://huggingface.co/tiiuae/falcon-40b)
|
||||||
- [MPT](https://huggingface.co/mosaicml/mpt-30b)
|
- [MPT](https://huggingface.co/mosaicml/mpt-30b)
|
||||||
- [Llama V2](https://huggingface.co/meta-llama)
|
- [Llama V2](https://huggingface.co/meta-llama)
|
||||||
- [Codellama](https://huggingface.co/codellama)
|
- [Code Llama](https://huggingface.co/codellama)
|
||||||
|
- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
|
||||||
|
|
||||||
If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models:
|
If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models:
|
||||||
|
|
||||||
@ -29,6 +30,12 @@ AutoModelForCausalLM.from_pretrained(<model>, device_map="auto")`
|
|||||||
AutoModelForSeq2SeqLM.from_pretrained(<model>, device_map="auto")
|
AutoModelForSeq2SeqLM.from_pretrained(<model>, device_map="auto")
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you wish to serve a supported model that already exists on a local folder, just point to the local folder.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
text-generation-launcher --model-id <PATH-TO-LOCAL-BLOOM>
|
||||||
|
``````
|
||||||
|
|
||||||
|
|
||||||
## Supported Hardware
|
## Supported Hardware
|
||||||
|
|
||||||
|
@ -0,0 +1,104 @@
|
|||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.703125,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4765625,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8583984,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7548828,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9306641,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4550781,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.5732422,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5761719,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5888672,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026504517,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4287109,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15856934,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62646484,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
}
|
@ -0,0 +1,99 @@
|
|||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -9.0859375,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -10.90625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -2.65625,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -4.8085938,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": 0,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -0.19958496,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4013,
|
||||||
|
"logprob": -2.203125,
|
||||||
|
"special": false,
|
||||||
|
"text": "This"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1139,
|
||||||
|
"logprob": -0.23693848,
|
||||||
|
"special": false,
|
||||||
|
"text": " question"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 756,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": " has"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1063,
|
||||||
|
"logprob": -0.076538086,
|
||||||
|
"special": false,
|
||||||
|
"text": " been"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4433,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": " asked"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1784,
|
||||||
|
"logprob": -1.1367188,
|
||||||
|
"special": false,
|
||||||
|
"text": " many"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3064,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": " times"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -1.7460938,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 306,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": " I"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "What is Deep Learning?\nThis question has been asked many times and I"
|
||||||
|
}
|
@ -0,0 +1,418 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.703125,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4765625,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8652344,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7548828,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9306641,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4550781,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.5732422,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5761719,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5888672,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026504517,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4287109,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15856934,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62646484,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.703125,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4765625,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8583984,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7548828,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9306641,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4550781,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.5732422,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5761719,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5888672,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026504517,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4287109,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15856934,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62646484,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.703125,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4765625,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8652344,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7548828,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9306641,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4550781,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.5732422,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5761719,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5888672,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026504517,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4287109,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15856934,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62646484,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.703125,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4765625,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8652344,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7548828,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9306641,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4550781,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.5732422,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5761719,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5888672,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026504517,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4287109,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15856934,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62646484,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,418 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.6914062,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4746094,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8623047,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7558594,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9228516,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4609375,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.57177734,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5722656,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5859375,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.02633667,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4335938,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15991211,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62060547,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.6914062,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4746094,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8623047,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7558594,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9228516,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4609375,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.57177734,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5722656,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5859375,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.02633667,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4335938,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15991211,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62060547,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.6914062,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4746094,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8623047,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7558594,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9228516,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4609375,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.57177734,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5722656,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5859375,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.02633667,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4335938,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15991211,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62060547,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.6914062,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4746094,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8623047,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7558594,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9228516,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4609375,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.57177734,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5722656,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5859375,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.02633667,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4335938,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.15991211,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17456055,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62060547,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
}
|
||||||
|
]
|
@ -0,0 +1,104 @@
|
|||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1724,
|
||||||
|
"logprob": -7.6914062,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -1.4746094,
|
||||||
|
"text": "is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -9.390625,
|
||||||
|
"text": "Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -1.8623047,
|
||||||
|
"text": "Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.7558594,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.9228516,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5618,
|
||||||
|
"logprob": -2.4609375,
|
||||||
|
"special": false,
|
||||||
|
"text": "What"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 338,
|
||||||
|
"logprob": -0.57177734,
|
||||||
|
"special": false,
|
||||||
|
"text": " is"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -1.5722656,
|
||||||
|
"special": false,
|
||||||
|
"text": " the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4328,
|
||||||
|
"logprob": -1.5927734,
|
||||||
|
"special": false,
|
||||||
|
"text": " difference"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1546,
|
||||||
|
"logprob": -0.026428223,
|
||||||
|
"special": false,
|
||||||
|
"text": " between"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21784,
|
||||||
|
"logprob": -1.4267578,
|
||||||
|
"special": false,
|
||||||
|
"text": " Deep"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29257,
|
||||||
|
"logprob": -0.16015625,
|
||||||
|
"special": false,
|
||||||
|
"text": " Learning"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 322,
|
||||||
|
"logprob": -0.17382812,
|
||||||
|
"special": false,
|
||||||
|
"text": " and"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6189,
|
||||||
|
"logprob": -0.62060547,
|
||||||
|
"special": false,
|
||||||
|
"text": " Machine"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
}
|
@ -16,7 +16,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -24,65 +24,66 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 363,
|
"id": 363,
|
||||||
"logprob": -1.5380859,
|
"logprob": -1.5351562,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " for"
|
"text": " for"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 847,
|
"id": 847,
|
||||||
"logprob": -2.5917969,
|
"logprob": -2.5722656,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " /"
|
"text": " /"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2754,
|
"id": 2754,
|
||||||
"logprob": -2.2773438,
|
"logprob": -2.2714844,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "api"
|
"text": "api"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.034362793,
|
"logprob": -0.03414917,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29894,
|
"id": 29894,
|
||||||
"logprob": -0.96533203,
|
"logprob": -0.95996094,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "v"
|
"text": "v"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -0.36669922,
|
"logprob": -0.3635254,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.013122559,
|
"logprob": -0.013031006,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 16418,
|
"id": 16418,
|
||||||
"logprob": -3.1503906,
|
"logprob": -3.1523438,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "projects"
|
"text": "projects"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.43652344,
|
"logprob": -0.43701172,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -1.9404297,
|
"logprob": -1.9394531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "for /api/v1/projects/1"
|
"generated_text": " for /api/v1/projects/1"
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -24,19 +24,19 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 5229,
|
"id": 5229,
|
||||||
"logprob": -2.5683594,
|
"logprob": -2.5839844,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " failed"
|
"text": " failed"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -0.45336914,
|
"logprob": -0.44970703,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 4829,
|
"id": 4829,
|
||||||
"logprob": -1.8408203,
|
"logprob": -1.8339844,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " Error"
|
"text": " Error"
|
||||||
},
|
},
|
||||||
@ -52,7 +52,8 @@
|
|||||||
"special": false,
|
"special": false,
|
||||||
"text": " test"
|
"text": " test"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "Test requestfailed: Error in test"
|
"generated_text": "Test request failed: Error in test"
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -25,25 +25,25 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 363,
|
"id": 363,
|
||||||
"logprob": -1.5380859,
|
"logprob": -1.5351562,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " for"
|
"text": " for"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 847,
|
"id": 847,
|
||||||
"logprob": -2.5859375,
|
"logprob": -2.5566406,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " /"
|
"text": " /"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2754,
|
"id": 2754,
|
||||||
"logprob": -2.2695312,
|
"logprob": -2.2519531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "api"
|
"text": "api"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.03439331,
|
"logprob": -0.03414917,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
@ -55,13 +55,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -0.36694336,
|
"logprob": -0.3647461,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.013114929,
|
"logprob": -0.012901306,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
@ -73,19 +73,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.43847656,
|
"logprob": -0.4362793,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -1.9433594,
|
"logprob": -1.9394531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "for /api/v1/projects/1"
|
"generated_text": " for /api/v1/projects/1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
@ -105,7 +106,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -113,43 +114,43 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 363,
|
"id": 363,
|
||||||
"logprob": -1.5322266,
|
"logprob": -1.5332031,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " for"
|
"text": " for"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 847,
|
"id": 847,
|
||||||
"logprob": -2.5585938,
|
"logprob": -2.5625,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " /"
|
"text": " /"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2754,
|
"id": 2754,
|
||||||
"logprob": -2.265625,
|
"logprob": -2.2617188,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "api"
|
"text": "api"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.034088135,
|
"logprob": -0.033996582,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29894,
|
"id": 29894,
|
||||||
"logprob": -0.96240234,
|
"logprob": -0.9609375,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "v"
|
"text": "v"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -0.36816406,
|
"logprob": -0.36572266,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.013191223,
|
"logprob": -0.0129776,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
@ -161,19 +162,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.43774414,
|
"logprob": -0.4362793,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -1.9443359,
|
"logprob": -1.9394531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "for /api/v1/projects/1"
|
"generated_text": " for /api/v1/projects/1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
@ -193,7 +195,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -201,43 +203,43 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 363,
|
"id": 363,
|
||||||
"logprob": -1.5322266,
|
"logprob": -1.5332031,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " for"
|
"text": " for"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 847,
|
"id": 847,
|
||||||
"logprob": -2.5585938,
|
"logprob": -2.5625,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " /"
|
"text": " /"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2754,
|
"id": 2754,
|
||||||
"logprob": -2.265625,
|
"logprob": -2.2617188,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "api"
|
"text": "api"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.034088135,
|
"logprob": -0.033996582,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29894,
|
"id": 29894,
|
||||||
"logprob": -0.96240234,
|
"logprob": -0.9609375,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "v"
|
"text": "v"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -0.36816406,
|
"logprob": -0.36572266,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.013191223,
|
"logprob": -0.0129776,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
@ -249,19 +251,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.43774414,
|
"logprob": -0.4362793,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -1.9443359,
|
"logprob": -1.9394531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "for /api/v1/projects/1"
|
"generated_text": " for /api/v1/projects/1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
@ -281,7 +284,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2009,
|
"id": 2009,
|
||||||
"logprob": -11.5546875,
|
"logprob": -11.546875,
|
||||||
"text": "request"
|
"text": "request"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -289,43 +292,43 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 363,
|
"id": 363,
|
||||||
"logprob": -1.5322266,
|
"logprob": -1.5332031,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " for"
|
"text": " for"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 847,
|
"id": 847,
|
||||||
"logprob": -2.5585938,
|
"logprob": -2.5625,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " /"
|
"text": " /"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2754,
|
"id": 2754,
|
||||||
"logprob": -2.265625,
|
"logprob": -2.2617188,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "api"
|
"text": "api"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.034088135,
|
"logprob": -0.033996582,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29894,
|
"id": 29894,
|
||||||
"logprob": -0.96240234,
|
"logprob": -0.9609375,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "v"
|
"text": "v"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -0.36816406,
|
"logprob": -0.36572266,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.013191223,
|
"logprob": -0.0129776,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
@ -337,18 +340,19 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29914,
|
"id": 29914,
|
||||||
"logprob": -0.43774414,
|
"logprob": -0.4362793,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "/"
|
"text": "/"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29896,
|
"id": 29896,
|
||||||
"logprob": -1.9443359,
|
"logprob": -1.9394531,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "1"
|
"text": "1"
|
||||||
}
|
}
|
||||||
]
|
],
|
||||||
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "for /api/v1/projects/1"
|
"generated_text": " for /api/v1/projects/1"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -0,0 +1,89 @@
|
|||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": -0.54785156,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -1.4091797,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 307,
|
||||||
|
"logprob": -3.0273438,
|
||||||
|
"special": false,
|
||||||
|
"text": " n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 327,
|
||||||
|
"logprob": -0.94433594,
|
||||||
|
"special": false,
|
||||||
|
"text": " ="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.81347656,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.2958984,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0644531,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.9580078,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.5073242,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.1816406,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": ": Let n = 10 - 1"
|
||||||
|
}
|
@ -0,0 +1,89 @@
|
|||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": 0,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -0.1307373,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 332,
|
||||||
|
"logprob": -2.3359375,
|
||||||
|
"special": false,
|
||||||
|
"text": " u"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 347,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": " be"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 325,
|
||||||
|
"logprob": -1.0234375,
|
||||||
|
"special": false,
|
||||||
|
"text": " ("
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0292969,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 648,
|
||||||
|
"logprob": -1.0439453,
|
||||||
|
"special": false,
|
||||||
|
"text": " +"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.24499512,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28770,
|
||||||
|
"logprob": -0.5073242,
|
||||||
|
"special": false,
|
||||||
|
"text": "3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.5507812,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": "Test request: Let u be (0 + 3 -"
|
||||||
|
}
|
@ -0,0 +1,358 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": -0.55078125,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -1.4140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 307,
|
||||||
|
"logprob": -3.0273438,
|
||||||
|
"special": false,
|
||||||
|
"text": " n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 327,
|
||||||
|
"logprob": -0.94140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " ="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.8173828,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.2978516,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0664062,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.9560547,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.5078125,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.1787109,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": ": Let n = 10 - 1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": -0.54785156,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -1.4111328,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 307,
|
||||||
|
"logprob": -3.0292969,
|
||||||
|
"special": false,
|
||||||
|
"text": " n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 327,
|
||||||
|
"logprob": -0.94433594,
|
||||||
|
"special": false,
|
||||||
|
"text": " ="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.8178711,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.2939453,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0644531,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.9550781,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.5078125,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.1796875,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": ": Let n = 10 - 1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": -0.55078125,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -1.4140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 307,
|
||||||
|
"logprob": -3.0273438,
|
||||||
|
"special": false,
|
||||||
|
"text": " n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 327,
|
||||||
|
"logprob": -0.94140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " ="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.8173828,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.2978516,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0664062,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.9560547,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.5078125,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.1787109,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": ": Let n = 10 - 1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3735,
|
||||||
|
"logprob": -12.9140625,
|
||||||
|
"text": "Test"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2159,
|
||||||
|
"logprob": -10.7578125,
|
||||||
|
"text": "request"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 28747,
|
||||||
|
"logprob": -0.55078125,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3169,
|
||||||
|
"logprob": -1.4140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " Let"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 307,
|
||||||
|
"logprob": -3.0273438,
|
||||||
|
"special": false,
|
||||||
|
"text": " n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 327,
|
||||||
|
"logprob": -0.94140625,
|
||||||
|
"special": false,
|
||||||
|
"text": " ="
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.8173828,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.2978516,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28734,
|
||||||
|
"logprob": -2.0664062,
|
||||||
|
"special": false,
|
||||||
|
"text": "0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 387,
|
||||||
|
"logprob": -1.9560547,
|
||||||
|
"special": false,
|
||||||
|
"text": " -"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28705,
|
||||||
|
"logprob": -0.5078125,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 28740,
|
||||||
|
"logprob": -1.1787109,
|
||||||
|
"special": false,
|
||||||
|
"text": "1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": ": Let n = 10 - 1"
|
||||||
|
}
|
||||||
|
]
|
@ -11,22 +11,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 4911,
|
"id": 4911,
|
||||||
"logprob": -5.7773438,
|
"logprob": -5.7851562,
|
||||||
"text": "User"
|
"text": "User"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -0.0069999695,
|
"logprob": -0.006996155,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32000,
|
"id": 32000,
|
||||||
"logprob": -0.8125,
|
"logprob": -0.81347656,
|
||||||
"text": "<fake_token_around_image>"
|
"text": "<fake_token_around_image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32001,
|
"id": 32001,
|
||||||
"logprob": -6.651878e-05,
|
"logprob": -6.687641e-05,
|
||||||
"text": "<image>"
|
"text": "<image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -36,67 +36,67 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1815,
|
"id": 1815,
|
||||||
"logprob": -4.2265625,
|
"logprob": -4.2148438,
|
||||||
"text": "Can"
|
"text": "Can"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 366,
|
"id": 366,
|
||||||
"logprob": -0.013977051,
|
"logprob": -0.014137268,
|
||||||
"text": "you"
|
"text": "you"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2649,
|
"id": 2649,
|
||||||
"logprob": -4.4375,
|
"logprob": -4.4335938,
|
||||||
"text": "tell"
|
"text": "tell"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 592,
|
"id": 592,
|
||||||
"logprob": -0.29077148,
|
"logprob": -0.2919922,
|
||||||
"text": "me"
|
"text": "me"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 263,
|
"id": 263,
|
||||||
"logprob": -4.2109375,
|
"logprob": -4.2070312,
|
||||||
"text": "a"
|
"text": "a"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1407,
|
"id": 1407,
|
||||||
"logprob": -9.4296875,
|
"logprob": -9.421875,
|
||||||
"text": "very"
|
"text": "very"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3273,
|
"id": 3273,
|
||||||
"logprob": -1.8671875,
|
"logprob": -1.8720703,
|
||||||
"text": "short"
|
"text": "short"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 5828,
|
"id": 5828,
|
||||||
"logprob": -0.26586914,
|
"logprob": -0.26489258,
|
||||||
"text": "story"
|
"text": "story"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2729,
|
"id": 2729,
|
||||||
"logprob": -3.7460938,
|
"logprob": -3.7441406,
|
||||||
"text": "based"
|
"text": "based"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 373,
|
"id": 373,
|
||||||
"logprob": -0.0005350113,
|
"logprob": -0.0005393028,
|
||||||
"text": "on"
|
"text": "on"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 278,
|
"id": 278,
|
||||||
"logprob": -0.13867188,
|
"logprob": -0.140625,
|
||||||
"text": "the"
|
"text": "the"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1967,
|
"id": 1967,
|
||||||
"logprob": -0.06842041,
|
"logprob": -0.06756592,
|
||||||
"text": "image"
|
"text": "image"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29973,
|
"id": 29973,
|
||||||
"logprob": -0.15319824,
|
"logprob": -0.15454102,
|
||||||
"text": "?"
|
"text": "?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -104,7 +104,7 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 32002,
|
"id": 32002,
|
||||||
"logprob": -0.0019445419,
|
"logprob": -0.0019140244,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<end_of_utterance>"
|
"text": "<end_of_utterance>"
|
||||||
},
|
},
|
||||||
@ -116,13 +116,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 13,
|
"id": 13,
|
||||||
"logprob": -1.7881393e-05,
|
"logprob": -1.7642975e-05,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "\n"
|
"text": "\n"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 7900,
|
"id": 7900,
|
||||||
"logprob": -3.0994415e-06,
|
"logprob": -2.9802322e-06,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "Ass"
|
"text": "Ass"
|
||||||
},
|
},
|
||||||
@ -140,30 +140,30 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 319,
|
"id": 319,
|
||||||
"logprob": -0.9057617,
|
"logprob": -0.91064453,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " A"
|
"text": " A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 696,
|
"id": 696,
|
||||||
"logprob": -1.2314453,
|
"logprob": -1.2412109,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " ro"
|
"text": " ro"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15664,
|
"id": 15664,
|
||||||
"logprob": -0.00024914742,
|
"logprob": -0.0002439022,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "oster"
|
"text": "oster"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15028,
|
"id": 15028,
|
||||||
"logprob": -1.1621094,
|
"logprob": -1.1630859,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " stands"
|
"text": " stands"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "\nAssistant: A rooster stands"
|
"generated_text": " \nAssistant: A rooster stands"
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,173 @@
|
|||||||
[
|
[
|
||||||
|
{
|
||||||
|
"details": {
|
||||||
|
"best_of_sequences": null,
|
||||||
|
"finish_reason": "length",
|
||||||
|
"generated_tokens": 10,
|
||||||
|
"prefill": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"logprob": null,
|
||||||
|
"text": "<s>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4911,
|
||||||
|
"logprob": -5.7851562,
|
||||||
|
"text": "User"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29901,
|
||||||
|
"logprob": -0.006996155,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 32000,
|
||||||
|
"logprob": -0.81347656,
|
||||||
|
"text": "<fake_token_around_image>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 32001,
|
||||||
|
"logprob": -6.687641e-05,
|
||||||
|
"text": "<image>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 32000,
|
||||||
|
"logprob": -3.5762787e-07,
|
||||||
|
"text": "<fake_token_around_image>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1815,
|
||||||
|
"logprob": -4.2148438,
|
||||||
|
"text": "Can"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 366,
|
||||||
|
"logprob": -0.014137268,
|
||||||
|
"text": "you"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2649,
|
||||||
|
"logprob": -4.4335938,
|
||||||
|
"text": "tell"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 592,
|
||||||
|
"logprob": -0.2919922,
|
||||||
|
"text": "me"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 263,
|
||||||
|
"logprob": -4.2070312,
|
||||||
|
"text": "a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1407,
|
||||||
|
"logprob": -9.421875,
|
||||||
|
"text": "very"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3273,
|
||||||
|
"logprob": -1.8720703,
|
||||||
|
"text": "short"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5828,
|
||||||
|
"logprob": -0.26489258,
|
||||||
|
"text": "story"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2729,
|
||||||
|
"logprob": -3.7441406,
|
||||||
|
"text": "based"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 373,
|
||||||
|
"logprob": -0.0005393028,
|
||||||
|
"text": "on"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 278,
|
||||||
|
"logprob": -0.140625,
|
||||||
|
"text": "the"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1967,
|
||||||
|
"logprob": -0.06756592,
|
||||||
|
"text": "image"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29973,
|
||||||
|
"logprob": -0.15454102,
|
||||||
|
"text": "?"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"seed": null,
|
||||||
|
"tokens": [
|
||||||
|
{
|
||||||
|
"id": 32002,
|
||||||
|
"logprob": -0.0019140244,
|
||||||
|
"special": true,
|
||||||
|
"text": "<end_of_utterance>"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29871,
|
||||||
|
"logprob": -8.392334e-05,
|
||||||
|
"special": false,
|
||||||
|
"text": " "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"logprob": -1.7881393e-05,
|
||||||
|
"special": false,
|
||||||
|
"text": "\n"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7900,
|
||||||
|
"logprob": -2.9802322e-06,
|
||||||
|
"special": false,
|
||||||
|
"text": "Ass"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 22137,
|
||||||
|
"logprob": 0.0,
|
||||||
|
"special": false,
|
||||||
|
"text": "istant"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 29901,
|
||||||
|
"logprob": -3.0994415e-06,
|
||||||
|
"special": false,
|
||||||
|
"text": ":"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 319,
|
||||||
|
"logprob": -0.9057617,
|
||||||
|
"special": false,
|
||||||
|
"text": " A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 696,
|
||||||
|
"logprob": -1.2294922,
|
||||||
|
"special": false,
|
||||||
|
"text": " ro"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15664,
|
||||||
|
"logprob": -0.00024533272,
|
||||||
|
"special": false,
|
||||||
|
"text": "oster"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15028,
|
||||||
|
"logprob": -1.1640625,
|
||||||
|
"special": false,
|
||||||
|
"text": " stands"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"top_tokens": null
|
||||||
|
},
|
||||||
|
"generated_text": " \nAssistant: A rooster stands"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
"best_of_sequences": null,
|
"best_of_sequences": null,
|
||||||
@ -17,17 +186,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -0.0069999695,
|
"logprob": -0.0070114136,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32000,
|
"id": 32000,
|
||||||
"logprob": -0.8125,
|
"logprob": -0.8208008,
|
||||||
"text": "<fake_token_around_image>"
|
"text": "<fake_token_around_image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32001,
|
"id": 32001,
|
||||||
"logprob": -6.651878e-05,
|
"logprob": -6.699562e-05,
|
||||||
"text": "<image>"
|
"text": "<image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -42,17 +211,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 366,
|
"id": 366,
|
||||||
"logprob": -0.013977051,
|
"logprob": -0.014175415,
|
||||||
"text": "you"
|
"text": "you"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2649,
|
"id": 2649,
|
||||||
"logprob": -4.4375,
|
"logprob": -4.4296875,
|
||||||
"text": "tell"
|
"text": "tell"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 592,
|
"id": 592,
|
||||||
"logprob": -0.29077148,
|
"logprob": -0.29516602,
|
||||||
"text": "me"
|
"text": "me"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -67,37 +236,37 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3273,
|
"id": 3273,
|
||||||
"logprob": -1.8671875,
|
"logprob": -1.8720703,
|
||||||
"text": "short"
|
"text": "short"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 5828,
|
"id": 5828,
|
||||||
"logprob": -0.26586914,
|
"logprob": -0.26879883,
|
||||||
"text": "story"
|
"text": "story"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2729,
|
"id": 2729,
|
||||||
"logprob": -3.7460938,
|
"logprob": -3.7675781,
|
||||||
"text": "based"
|
"text": "based"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 373,
|
"id": 373,
|
||||||
"logprob": -0.0005350113,
|
"logprob": -0.0005354881,
|
||||||
"text": "on"
|
"text": "on"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 278,
|
"id": 278,
|
||||||
"logprob": -0.13867188,
|
"logprob": -0.13671875,
|
||||||
"text": "the"
|
"text": "the"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1967,
|
"id": 1967,
|
||||||
"logprob": -0.06842041,
|
"logprob": -0.06719971,
|
||||||
"text": "image"
|
"text": "image"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29973,
|
"id": 29973,
|
||||||
"logprob": -0.15319824,
|
"logprob": -0.15551758,
|
||||||
"text": "?"
|
"text": "?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -105,13 +274,13 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 32002,
|
"id": 32002,
|
||||||
"logprob": -0.0019445419,
|
"logprob": -0.0019130707,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<end_of_utterance>"
|
"text": "<end_of_utterance>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29871,
|
"id": 29871,
|
||||||
"logprob": -8.416176e-05,
|
"logprob": -8.392334e-05,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " "
|
"text": " "
|
||||||
},
|
},
|
||||||
@ -135,25 +304,25 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -3.2186508e-06,
|
"logprob": -3.0994415e-06,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 319,
|
"id": 319,
|
||||||
"logprob": -0.89941406,
|
"logprob": -0.9013672,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " A"
|
"text": " A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 696,
|
"id": 696,
|
||||||
"logprob": -1.234375,
|
"logprob": -1.2324219,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " ro"
|
"text": " ro"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15664,
|
"id": 15664,
|
||||||
"logprob": -0.0002465248,
|
"logprob": -0.0002477169,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "oster"
|
"text": "oster"
|
||||||
},
|
},
|
||||||
@ -166,7 +335,7 @@
|
|||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "\nAssistant: A rooster stands"
|
"generated_text": " \nAssistant: A rooster stands"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
@ -181,22 +350,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 4911,
|
"id": 4911,
|
||||||
"logprob": -5.7890625,
|
"logprob": -5.7773438,
|
||||||
"text": "User"
|
"text": "User"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -0.0070152283,
|
"logprob": -0.0070114136,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32000,
|
"id": 32000,
|
||||||
"logprob": -0.8125,
|
"logprob": -0.8208008,
|
||||||
"text": "<fake_token_around_image>"
|
"text": "<fake_token_around_image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32001,
|
"id": 32001,
|
||||||
"logprob": -6.651878e-05,
|
"logprob": -6.699562e-05,
|
||||||
"text": "<image>"
|
"text": "<image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -211,17 +380,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 366,
|
"id": 366,
|
||||||
"logprob": -0.014190674,
|
"logprob": -0.014175415,
|
||||||
"text": "you"
|
"text": "you"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2649,
|
"id": 2649,
|
||||||
"logprob": -4.4140625,
|
"logprob": -4.4296875,
|
||||||
"text": "tell"
|
"text": "tell"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 592,
|
"id": 592,
|
||||||
"logprob": -0.2919922,
|
"logprob": -0.29516602,
|
||||||
"text": "me"
|
"text": "me"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -231,7 +400,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1407,
|
"id": 1407,
|
||||||
"logprob": -9.4375,
|
"logprob": -9.4296875,
|
||||||
"text": "very"
|
"text": "very"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -241,7 +410,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 5828,
|
"id": 5828,
|
||||||
"logprob": -0.26904297,
|
"logprob": -0.26879883,
|
||||||
"text": "story"
|
"text": "story"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -251,22 +420,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 373,
|
"id": 373,
|
||||||
"logprob": -0.0005402565,
|
"logprob": -0.0005354881,
|
||||||
"text": "on"
|
"text": "on"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 278,
|
"id": 278,
|
||||||
"logprob": -0.13867188,
|
"logprob": -0.13671875,
|
||||||
"text": "the"
|
"text": "the"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1967,
|
"id": 1967,
|
||||||
"logprob": -0.068359375,
|
"logprob": -0.06719971,
|
||||||
"text": "image"
|
"text": "image"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29973,
|
"id": 29973,
|
||||||
"logprob": -0.15539551,
|
"logprob": -0.15551758,
|
||||||
"text": "?"
|
"text": "?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -274,7 +443,7 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 32002,
|
"id": 32002,
|
||||||
"logprob": -0.0019168854,
|
"logprob": -0.001912117,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<end_of_utterance>"
|
"text": "<end_of_utterance>"
|
||||||
},
|
},
|
||||||
@ -286,7 +455,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 13,
|
"id": 13,
|
||||||
"logprob": -1.7642975e-05,
|
"logprob": -1.7762184e-05,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "\n"
|
"text": "\n"
|
||||||
},
|
},
|
||||||
@ -310,32 +479,32 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 319,
|
"id": 319,
|
||||||
"logprob": -0.90722656,
|
"logprob": -0.9013672,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " A"
|
"text": " A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 696,
|
"id": 696,
|
||||||
"logprob": -1.2373047,
|
"logprob": -1.2324219,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " ro"
|
"text": " ro"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15664,
|
"id": 15664,
|
||||||
"logprob": -0.00024938583,
|
"logprob": -0.0002477169,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "oster"
|
"text": "oster"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15028,
|
"id": 15028,
|
||||||
"logprob": -1.1708984,
|
"logprob": -1.1660156,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " stands"
|
"text": " stands"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "\nAssistant: A rooster stands"
|
"generated_text": " \nAssistant: A rooster stands"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"details": {
|
"details": {
|
||||||
@ -350,22 +519,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 4911,
|
"id": 4911,
|
||||||
"logprob": -5.7890625,
|
"logprob": -5.7773438,
|
||||||
"text": "User"
|
"text": "User"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29901,
|
"id": 29901,
|
||||||
"logprob": -0.0070152283,
|
"logprob": -0.0070114136,
|
||||||
"text": ":"
|
"text": ":"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32000,
|
"id": 32000,
|
||||||
"logprob": -0.8125,
|
"logprob": -0.8208008,
|
||||||
"text": "<fake_token_around_image>"
|
"text": "<fake_token_around_image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 32001,
|
"id": 32001,
|
||||||
"logprob": -6.663799e-05,
|
"logprob": -6.699562e-05,
|
||||||
"text": "<image>"
|
"text": "<image>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -380,17 +549,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 366,
|
"id": 366,
|
||||||
"logprob": -0.014190674,
|
"logprob": -0.014175415,
|
||||||
"text": "you"
|
"text": "you"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2649,
|
"id": 2649,
|
||||||
"logprob": -4.4140625,
|
"logprob": -4.4296875,
|
||||||
"text": "tell"
|
"text": "tell"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 592,
|
"id": 592,
|
||||||
"logprob": -0.2919922,
|
"logprob": -0.29516602,
|
||||||
"text": "me"
|
"text": "me"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -400,7 +569,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1407,
|
"id": 1407,
|
||||||
"logprob": -9.4375,
|
"logprob": -9.4296875,
|
||||||
"text": "very"
|
"text": "very"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -410,7 +579,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 5828,
|
"id": 5828,
|
||||||
"logprob": -0.26904297,
|
"logprob": -0.26879883,
|
||||||
"text": "story"
|
"text": "story"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -420,22 +589,22 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 373,
|
"id": 373,
|
||||||
"logprob": -0.0005402565,
|
"logprob": -0.0005354881,
|
||||||
"text": "on"
|
"text": "on"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 278,
|
"id": 278,
|
||||||
"logprob": -0.13867188,
|
"logprob": -0.13671875,
|
||||||
"text": "the"
|
"text": "the"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1967,
|
"id": 1967,
|
||||||
"logprob": -0.068359375,
|
"logprob": -0.06719971,
|
||||||
"text": "image"
|
"text": "image"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29973,
|
"id": 29973,
|
||||||
"logprob": -0.15539551,
|
"logprob": -0.15551758,
|
||||||
"text": "?"
|
"text": "?"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -443,19 +612,19 @@
|
|||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 32002,
|
"id": 32002,
|
||||||
"logprob": -0.0019168854,
|
"logprob": -0.001912117,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<end_of_utterance>"
|
"text": "<end_of_utterance>"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 29871,
|
"id": 29871,
|
||||||
"logprob": -8.404255e-05,
|
"logprob": -8.392334e-05,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " "
|
"text": " "
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 13,
|
"id": 13,
|
||||||
"logprob": -1.7642975e-05,
|
"logprob": -1.7762184e-05,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "\n"
|
"text": "\n"
|
||||||
},
|
},
|
||||||
@ -479,200 +648,31 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 319,
|
"id": 319,
|
||||||
"logprob": -0.90722656,
|
"logprob": -0.9013672,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " A"
|
"text": " A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 696,
|
"id": 696,
|
||||||
"logprob": -1.2373047,
|
"logprob": -1.2324219,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " ro"
|
"text": " ro"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15664,
|
"id": 15664,
|
||||||
"logprob": -0.00024938583,
|
"logprob": -0.0002477169,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "oster"
|
"text": "oster"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15028,
|
"id": 15028,
|
||||||
"logprob": -1.1708984,
|
"logprob": -1.1660156,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " stands"
|
"text": " stands"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "\nAssistant: A rooster stands"
|
"generated_text": " \nAssistant: A rooster stands"
|
||||||
},
|
|
||||||
{
|
|
||||||
"details": {
|
|
||||||
"best_of_sequences": null,
|
|
||||||
"finish_reason": "length",
|
|
||||||
"generated_tokens": 10,
|
|
||||||
"prefill": [
|
|
||||||
{
|
|
||||||
"id": 1,
|
|
||||||
"logprob": null,
|
|
||||||
"text": "<s>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 4911,
|
|
||||||
"logprob": -5.7890625,
|
|
||||||
"text": "User"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 29901,
|
|
||||||
"logprob": -0.0070152283,
|
|
||||||
"text": ":"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 32000,
|
|
||||||
"logprob": -0.8125,
|
|
||||||
"text": "<fake_token_around_image>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 32001,
|
|
||||||
"logprob": -6.663799e-05,
|
|
||||||
"text": "<image>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 32000,
|
|
||||||
"logprob": -3.5762787e-07,
|
|
||||||
"text": "<fake_token_around_image>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1815,
|
|
||||||
"logprob": -4.2265625,
|
|
||||||
"text": "Can"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 366,
|
|
||||||
"logprob": -0.014190674,
|
|
||||||
"text": "you"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 2649,
|
|
||||||
"logprob": -4.4140625,
|
|
||||||
"text": "tell"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 592,
|
|
||||||
"logprob": -0.2919922,
|
|
||||||
"text": "me"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 263,
|
|
||||||
"logprob": -4.2109375,
|
|
||||||
"text": "a"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1407,
|
|
||||||
"logprob": -9.4375,
|
|
||||||
"text": "very"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 3273,
|
|
||||||
"logprob": -1.8720703,
|
|
||||||
"text": "short"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 5828,
|
|
||||||
"logprob": -0.26904297,
|
|
||||||
"text": "story"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 2729,
|
|
||||||
"logprob": -3.7675781,
|
|
||||||
"text": "based"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 373,
|
|
||||||
"logprob": -0.0005402565,
|
|
||||||
"text": "on"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 278,
|
|
||||||
"logprob": -0.13867188,
|
|
||||||
"text": "the"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 1967,
|
|
||||||
"logprob": -0.068359375,
|
|
||||||
"text": "image"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 29973,
|
|
||||||
"logprob": -0.15539551,
|
|
||||||
"text": "?"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"seed": null,
|
|
||||||
"tokens": [
|
|
||||||
{
|
|
||||||
"id": 32002,
|
|
||||||
"logprob": -0.0019159317,
|
|
||||||
"special": true,
|
|
||||||
"text": "<end_of_utterance>"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 29871,
|
|
||||||
"logprob": -8.404255e-05,
|
|
||||||
"special": false,
|
|
||||||
"text": " "
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 13,
|
|
||||||
"logprob": -1.7642975e-05,
|
|
||||||
"special": false,
|
|
||||||
"text": "\n"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 7900,
|
|
||||||
"logprob": -3.0994415e-06,
|
|
||||||
"special": false,
|
|
||||||
"text": "Ass"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 22137,
|
|
||||||
"logprob": 0.0,
|
|
||||||
"special": false,
|
|
||||||
"text": "istant"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 29901,
|
|
||||||
"logprob": -3.0994415e-06,
|
|
||||||
"special": false,
|
|
||||||
"text": ":"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 319,
|
|
||||||
"logprob": -0.90722656,
|
|
||||||
"special": false,
|
|
||||||
"text": " A"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 696,
|
|
||||||
"logprob": -1.2373047,
|
|
||||||
"special": false,
|
|
||||||
"text": " ro"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 15664,
|
|
||||||
"logprob": -0.00024938583,
|
|
||||||
"special": false,
|
|
||||||
"text": "oster"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"id": 15028,
|
|
||||||
"logprob": -1.1708984,
|
|
||||||
"special": false,
|
|
||||||
"text": " stands"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"top_tokens": null
|
|
||||||
},
|
|
||||||
"generated_text": "\nAssistant: A rooster stands"
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
73
integration-tests/models/test_flash_awq.py
Normal file
73
integration-tests/models/test_flash_awq.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def flash_llama_awq_handle(launcher):
|
||||||
|
with launcher(
|
||||||
|
"abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq",
|
||||||
|
num_shard=1,
|
||||||
|
quantize="awq",
|
||||||
|
) as handle:
|
||||||
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
async def flash_llama_awq(flash_llama_awq_handle):
|
||||||
|
await flash_llama_awq_handle.health(300)
|
||||||
|
return flash_llama_awq_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
|
||||||
|
response = await flash_llama_awq.generate(
|
||||||
|
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.details.generated_tokens == 10
|
||||||
|
assert (
|
||||||
|
response.generated_text
|
||||||
|
== "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
)
|
||||||
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
|
||||||
|
response = await flash_llama_awq.generate(
|
||||||
|
"What is Deep Learning?",
|
||||||
|
max_new_tokens=10,
|
||||||
|
repetition_penalty=1.2,
|
||||||
|
return_full_text=True,
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.9,
|
||||||
|
top_k=10,
|
||||||
|
truncate=5,
|
||||||
|
typical_p=0.9,
|
||||||
|
watermark=True,
|
||||||
|
decoder_input_details=True,
|
||||||
|
seed=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.details.generated_tokens == 10
|
||||||
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_awq_load(flash_llama_awq, generate_load, response_snapshot):
|
||||||
|
responses = await generate_load(
|
||||||
|
flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(responses) == 4
|
||||||
|
assert all(
|
||||||
|
[
|
||||||
|
r.generated_text
|
||||||
|
== "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
for r in responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert responses == response_snapshot
|
53
integration-tests/models/test_flash_awq_sharded.py
Normal file
53
integration-tests/models/test_flash_awq_sharded.py
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def flash_llama_awq_handle_sharded(launcher):
|
||||||
|
with launcher(
|
||||||
|
"abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq",
|
||||||
|
num_shard=2,
|
||||||
|
quantize="awq",
|
||||||
|
) as handle:
|
||||||
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
|
||||||
|
await flash_llama_awq_handle_sharded.health(300)
|
||||||
|
return flash_llama_awq_handle_sharded.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
|
||||||
|
response = await flash_llama_awq_sharded.generate(
|
||||||
|
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.details.generated_tokens == 10
|
||||||
|
assert (
|
||||||
|
response.generated_text
|
||||||
|
== "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
)
|
||||||
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_llama_awq_load_sharded(
|
||||||
|
flash_llama_awq_sharded, generate_load, response_snapshot
|
||||||
|
):
|
||||||
|
responses = await generate_load(
|
||||||
|
flash_llama_awq_sharded, "What is Deep Learning?", max_new_tokens=10, n=4
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(responses) == 4
|
||||||
|
assert all(
|
||||||
|
[
|
||||||
|
r.generated_text
|
||||||
|
== "\nWhat is the difference between Deep Learning and Machine"
|
||||||
|
for r in responses
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
assert responses == response_snapshot
|
60
integration-tests/models/test_flash_mistral.py
Normal file
60
integration-tests/models/test_flash_mistral.py
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def flash_mistral_handle(launcher):
|
||||||
|
with launcher("mistralai/Mistral-7B-Instruct-v0.1") as handle:
|
||||||
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
async def flash_mistral(flash_mistral_handle):
|
||||||
|
await flash_mistral_handle.health(300)
|
||||||
|
return flash_mistral_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_mistral(flash_mistral, response_snapshot):
|
||||||
|
response = await flash_mistral.generate(
|
||||||
|
"Test request", max_new_tokens=10, decoder_input_details=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.details.generated_tokens == 10
|
||||||
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_mistral_all_params(flash_mistral, response_snapshot):
|
||||||
|
response = await flash_mistral.generate(
|
||||||
|
"Test request",
|
||||||
|
max_new_tokens=10,
|
||||||
|
repetition_penalty=1.2,
|
||||||
|
return_full_text=True,
|
||||||
|
stop_sequences=["test"],
|
||||||
|
temperature=0.5,
|
||||||
|
top_p=0.9,
|
||||||
|
top_k=10,
|
||||||
|
truncate=5,
|
||||||
|
typical_p=0.9,
|
||||||
|
watermark=True,
|
||||||
|
decoder_input_details=True,
|
||||||
|
seed=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert response.details.generated_tokens == 10
|
||||||
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.private
|
||||||
|
async def test_flash_mistral_load(flash_mistral, generate_load, response_snapshot):
|
||||||
|
responses = await generate_load(
|
||||||
|
flash_mistral, "Test request", max_new_tokens=10, n=4
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(responses) == 4
|
||||||
|
assert all([r.generated_text == responses[0].generated_text for r in responses])
|
||||||
|
|
||||||
|
assert responses == response_snapshot
|
@ -3,9 +3,7 @@ import pytest
|
|||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def idefics_handle(launcher):
|
def idefics_handle(launcher):
|
||||||
with launcher(
|
with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2) as handle:
|
||||||
"HuggingFaceM4/idefics-9b-instruct", num_shard=2
|
|
||||||
) as handle:
|
|
||||||
yield handle
|
yield handle
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-integration-tests"
|
name = "text-generation-integration-tests"
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
description = "Text Generation Inference integration tests"
|
description = "Text Generation Inference integration tests"
|
||||||
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
||||||
|
|
||||||
|
@ -7,17 +7,17 @@ authors.workspace = true
|
|||||||
homepage.workspace = true
|
homepage.workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
ctrlc = { version = "3.2.5", features = ["termination"] }
|
ctrlc = { version = "3.4.1", features = ["termination"] }
|
||||||
nix = "0.26.2"
|
nix = "0.27.1"
|
||||||
serde = { version = "1.0.152", features = ["derive"] }
|
serde = { version = "1.0.188", features = ["derive"] }
|
||||||
serde_json = "1.0.93"
|
serde_json = "1.0.107"
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
float_eq = "1.0.1"
|
float_eq = "1.0.1"
|
||||||
reqwest = { version = "0.11.14", features = ["blocking", "json"] }
|
reqwest = { version = "0.11.20", features = ["blocking", "json"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
vergen = { version = "8.0.0", features = ["build", "cargo", "git", "gitcl", "rustc", "si"] }
|
vergen = { version = "8.2.5", features = ["build", "cargo", "git", "gitcl", "rustc", "si"] }
|
||||||
|
@ -21,10 +21,32 @@ mod env_runtime;
|
|||||||
|
|
||||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
enum Quantization {
|
enum Quantization {
|
||||||
Bitsandbytes,
|
/// 4 bit quantization. Requires a specific GTPQ quantized model:
|
||||||
BitsandbytesNF4,
|
/// https://hf.co/models?search=awq.
|
||||||
BitsandbytesFP4,
|
/// Should replace GPTQ models whereever possible because of the better latency
|
||||||
|
Awq,
|
||||||
|
/// 8 bit quantization, doesn't require specific model.
|
||||||
|
/// Should be a drop-in replacement to bitsandbytes with much better performance.
|
||||||
|
/// Kernels are from https://github.com/NetEase-FuXi/EETQ.git
|
||||||
|
Eetq,
|
||||||
|
/// 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq.
|
||||||
|
/// text-generation-inference will use exllama (faster) kernels whereever possible, and use
|
||||||
|
/// triton kernel (wider support) when it's not.
|
||||||
|
/// AWQ has faster kernels.
|
||||||
Gptq,
|
Gptq,
|
||||||
|
/// Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half,
|
||||||
|
/// but it is known that the model will be much slower to run than the native f16.
|
||||||
|
#[deprecated(
|
||||||
|
since = "1.1.0",
|
||||||
|
note = "Use `eetq` instead, which provides better latencies overall and is drop-in in most cases"
|
||||||
|
)]
|
||||||
|
Bitsandbytes,
|
||||||
|
/// Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x,
|
||||||
|
/// but it is known that the model will be much slower to run than the native f16.
|
||||||
|
BitsandbytesNF4,
|
||||||
|
/// Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better
|
||||||
|
/// perplexity performance for you model
|
||||||
|
BitsandbytesFP4,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Quantization {
|
impl std::fmt::Display for Quantization {
|
||||||
@ -43,6 +65,12 @@ impl std::fmt::Display for Quantization {
|
|||||||
Quantization::Gptq => {
|
Quantization::Gptq => {
|
||||||
write!(f, "gptq")
|
write!(f, "gptq")
|
||||||
}
|
}
|
||||||
|
Quantization::Awq => {
|
||||||
|
write!(f, "awq")
|
||||||
|
}
|
||||||
|
Quantization::Eetq => {
|
||||||
|
write!(f, "eetq")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,9 +151,7 @@ struct Args {
|
|||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
num_shard: Option<usize>,
|
num_shard: Option<usize>,
|
||||||
|
|
||||||
/// Whether you want the model to be quantized. This will use `bitsandbytes` for
|
/// Whether you want the model to be quantized.
|
||||||
/// quantization on the fly, or `gptq`. 4bit quantization is available through
|
|
||||||
/// `bitsandbytes` by providing the `bitsandbytes-fp4` or `bitsandbytes-nf4` options.
|
|
||||||
#[clap(long, env, value_enum)]
|
#[clap(long, env, value_enum)]
|
||||||
quantize: Option<Quantization>,
|
quantize: Option<Quantization>,
|
||||||
|
|
||||||
|
@ -31,6 +31,7 @@ message InfoResponse {
|
|||||||
bool requires_padding = 1;
|
bool requires_padding = 1;
|
||||||
string dtype = 2;
|
string dtype = 2;
|
||||||
string device_type = 3;
|
string device_type = 3;
|
||||||
|
optional uint32 window_size = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Empty request
|
/// Empty request
|
||||||
|
@ -15,35 +15,37 @@ name = "text-generation-router"
|
|||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
async-stream = "0.3.3"
|
async-stream = "0.3.5"
|
||||||
axum = { version = "0.6.4", features = ["json"] }
|
axum = { version = "0.6.20", features = ["json"] }
|
||||||
axum-tracing-opentelemetry = "0.10.0"
|
axum-tracing-opentelemetry = "0.14.1"
|
||||||
text-generation-client = { path = "client" }
|
text-generation-client = { path = "client" }
|
||||||
clap = { version = "4.1.4", features = ["derive", "env"] }
|
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||||
flume = "0.10.14"
|
flume = "0.11.0"
|
||||||
futures = "0.3.26"
|
futures = "0.3.28"
|
||||||
metrics = "0.21.0"
|
metrics = "0.21.1"
|
||||||
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
|
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
|
||||||
nohash-hasher = "0.2.0"
|
nohash-hasher = "0.2.0"
|
||||||
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
|
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
|
||||||
opentelemetry-otlp = "0.12.0"
|
opentelemetry-otlp = "0.13.0"
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
reqwest = { version = "0.11.14", features = [] }
|
reqwest = { version = "0.11.20", features = [] }
|
||||||
serde = "1.0.152"
|
serde = "1.0.188"
|
||||||
serde_json = "1.0.93"
|
serde_json = "1.0.107"
|
||||||
thiserror = "1.0.38"
|
thiserror = "1.0.48"
|
||||||
tokenizers = "0.13.3"
|
tokenizers = { version = "0.14.0", features = ["http"] }
|
||||||
tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
|
||||||
tower-http = { version = "0.4.0", features = ["cors"] }
|
tower-http = { version = "0.4.4", features = ["cors"] }
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-opentelemetry = "0.19.0"
|
tracing-opentelemetry = "0.21.0"
|
||||||
tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
utoipa = { version = "3.0.1", features = ["axum_extras"] }
|
utoipa = { version = "3.5.0", features = ["axum_extras"] }
|
||||||
utoipa-swagger-ui = { version = "3.0.2", features = ["axum"] }
|
utoipa-swagger-ui = { version = "3.1.5", features = ["axum"] }
|
||||||
ngrok = { version = "0.12.3", features = ["axum"], optional = true }
|
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
||||||
|
hf-hub = "0.3.1"
|
||||||
|
init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
vergen = { version = "8.0.0", features = ["build", "git", "gitcl"] }
|
vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["ngrok"]
|
default = ["ngrok"]
|
||||||
|
@ -8,13 +8,13 @@ homepage.workspace = true
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
futures = "^0.3"
|
futures = "^0.3"
|
||||||
grpc-metadata = { path = "../grpc-metadata" }
|
grpc-metadata = { path = "../grpc-metadata" }
|
||||||
prost = "^0.11"
|
prost = "^0.12"
|
||||||
thiserror = "^1.0"
|
thiserror = "^1.0"
|
||||||
tokio = { version = "^1.25", features = ["sync"] }
|
tokio = { version = "^1.32", features = ["sync"] }
|
||||||
tonic = "^0.9"
|
tonic = "^0.10"
|
||||||
tower = "^0.4"
|
tower = "^0.4"
|
||||||
tracing = "^0.1"
|
tracing = "^0.1"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
tonic-build = "0.9.2"
|
tonic-build = "0.10.1"
|
||||||
prost-build = "0.11.6"
|
prost-build = "0.12.1"
|
||||||
|
@ -4,7 +4,7 @@ version = "0.1.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
opentelemetry = "^0.19"
|
opentelemetry = "^0.20"
|
||||||
tonic = "^0.9"
|
tonic = "^0.10"
|
||||||
tracing = "^0.1"
|
tracing = "^0.1"
|
||||||
tracing-opentelemetry = "^0.19"
|
tracing-opentelemetry = "^0.21"
|
||||||
|
@ -50,10 +50,11 @@ impl Infer {
|
|||||||
max_waiting_tokens: usize,
|
max_waiting_tokens: usize,
|
||||||
max_concurrent_requests: usize,
|
max_concurrent_requests: usize,
|
||||||
requires_padding: bool,
|
requires_padding: bool,
|
||||||
|
window_size: Option<u32>,
|
||||||
generation_health: Arc<AtomicBool>,
|
generation_health: Arc<AtomicBool>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
// Infer shared state
|
// Infer shared state
|
||||||
let queue = Queue::new(requires_padding, 16);
|
let queue = Queue::new(requires_padding, 16, window_size);
|
||||||
let shared = Arc::new(Shared {
|
let shared = Arc::new(Shared {
|
||||||
batching_task: Notify::new(),
|
batching_task: Notify::new(),
|
||||||
});
|
});
|
||||||
|
@ -324,7 +324,7 @@ fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
|
|||||||
|
|
||||||
if let Ok(tracer) = tracer {
|
if let Ok(tracer) = tracer {
|
||||||
layers.push(tracing_opentelemetry::layer().with_tracer(tracer).boxed());
|
layers.push(tracing_opentelemetry::layer().with_tracer(tracer).boxed());
|
||||||
axum_tracing_opentelemetry::init_propagator().unwrap();
|
init_tracing_opentelemetry::init_propagator().unwrap();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ use crate::infer::InferError;
|
|||||||
use crate::infer::InferStreamResponse;
|
use crate::infer::InferStreamResponse;
|
||||||
use crate::validation::ValidGenerateRequest;
|
use crate::validation::ValidGenerateRequest;
|
||||||
use nohash_hasher::{BuildNoHashHasher, IntMap};
|
use nohash_hasher::{BuildNoHashHasher, IntMap};
|
||||||
|
use std::cmp::min;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use text_generation_client::{Batch, Request};
|
use text_generation_client::{Batch, Request};
|
||||||
use tokio::sync::oneshot;
|
use tokio::sync::oneshot;
|
||||||
@ -33,12 +34,17 @@ pub(crate) struct Queue {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Queue {
|
impl Queue {
|
||||||
pub(crate) fn new(requires_padding: bool, block_size: u32) -> Self {
|
pub(crate) fn new(requires_padding: bool, block_size: u32, window_size: Option<u32>) -> Self {
|
||||||
// Create channel
|
// Create channel
|
||||||
let (queue_sender, queue_receiver) = flume::unbounded();
|
let (queue_sender, queue_receiver) = flume::unbounded();
|
||||||
|
|
||||||
// Launch background queue task
|
// Launch background queue task
|
||||||
tokio::spawn(queue_task(requires_padding, block_size, queue_receiver));
|
tokio::spawn(queue_task(
|
||||||
|
requires_padding,
|
||||||
|
block_size,
|
||||||
|
window_size,
|
||||||
|
queue_receiver,
|
||||||
|
));
|
||||||
|
|
||||||
Self { queue_sender }
|
Self { queue_sender }
|
||||||
}
|
}
|
||||||
@ -84,9 +90,10 @@ impl Queue {
|
|||||||
async fn queue_task(
|
async fn queue_task(
|
||||||
requires_padding: bool,
|
requires_padding: bool,
|
||||||
block_size: u32,
|
block_size: u32,
|
||||||
|
window_size: Option<u32>,
|
||||||
receiver: flume::Receiver<QueueCommand>,
|
receiver: flume::Receiver<QueueCommand>,
|
||||||
) {
|
) {
|
||||||
let mut state = State::new(requires_padding, block_size);
|
let mut state = State::new(requires_padding, block_size, window_size);
|
||||||
|
|
||||||
while let Ok(cmd) = receiver.recv_async().await {
|
while let Ok(cmd) = receiver.recv_async().await {
|
||||||
match cmd {
|
match cmd {
|
||||||
@ -126,16 +133,20 @@ struct State {
|
|||||||
|
|
||||||
/// Paged Attention block size
|
/// Paged Attention block size
|
||||||
block_size: u32,
|
block_size: u32,
|
||||||
|
|
||||||
|
/// Sliding window
|
||||||
|
window_size: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl State {
|
impl State {
|
||||||
fn new(requires_padding: bool, block_size: u32) -> Self {
|
fn new(requires_padding: bool, block_size: u32, window_size: Option<u32>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
entries: VecDeque::with_capacity(128),
|
entries: VecDeque::with_capacity(128),
|
||||||
next_id: 0,
|
next_id: 0,
|
||||||
next_batch_id: 0,
|
next_batch_id: 0,
|
||||||
requires_padding,
|
requires_padding,
|
||||||
block_size,
|
block_size,
|
||||||
|
window_size,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,11 +215,17 @@ impl State {
|
|||||||
if self.requires_padding {
|
if self.requires_padding {
|
||||||
decode_tokens += entry.request.stopping_parameters.max_new_tokens;
|
decode_tokens += entry.request.stopping_parameters.max_new_tokens;
|
||||||
} else {
|
} else {
|
||||||
|
let max_new_tokens = match self.window_size {
|
||||||
|
None => entry.request.stopping_parameters.max_new_tokens,
|
||||||
|
Some(window_size) => min(
|
||||||
|
window_size.saturating_sub(entry.request.input_length),
|
||||||
|
entry.request.stopping_parameters.max_new_tokens,
|
||||||
|
),
|
||||||
|
};
|
||||||
|
|
||||||
// pad to block size
|
// pad to block size
|
||||||
decode_tokens +=
|
decode_tokens +=
|
||||||
((entry.request.stopping_parameters.max_new_tokens + self.block_size - 1)
|
((max_new_tokens + self.block_size - 1) / self.block_size) * self.block_size;
|
||||||
/ self.block_size)
|
|
||||||
* self.block_size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if prefill_tokens > prefill_token_budget
|
if prefill_tokens > prefill_token_budget
|
||||||
@ -342,7 +359,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_append() {
|
fn test_append() {
|
||||||
let mut state = State::new(false, 1);
|
let mut state = State::new(false, 1, None);
|
||||||
let (entry, _guard) = default_entry();
|
let (entry, _guard) = default_entry();
|
||||||
|
|
||||||
assert_eq!(state.next_id, 0);
|
assert_eq!(state.next_id, 0);
|
||||||
@ -358,7 +375,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_next_batch_empty() {
|
fn test_next_batch_empty() {
|
||||||
let mut state = State::new(false, 1);
|
let mut state = State::new(false, 1, None);
|
||||||
|
|
||||||
assert!(state.next_batch(None, 1, 1).is_none());
|
assert!(state.next_batch(None, 1, 1).is_none());
|
||||||
assert!(state.next_batch(Some(1), 1, 1).is_none());
|
assert!(state.next_batch(Some(1), 1, 1).is_none());
|
||||||
@ -366,7 +383,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_next_batch_min_size() {
|
fn test_next_batch_min_size() {
|
||||||
let mut state = State::new(false, 1);
|
let mut state = State::new(false, 1, None);
|
||||||
let (entry1, _guard1) = default_entry();
|
let (entry1, _guard1) = default_entry();
|
||||||
let (entry2, _guard2) = default_entry();
|
let (entry2, _guard2) = default_entry();
|
||||||
state.append(entry1);
|
state.append(entry1);
|
||||||
@ -398,7 +415,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_next_batch_token_budget() {
|
fn test_next_batch_token_budget() {
|
||||||
let mut state = State::new(false, 1);
|
let mut state = State::new(false, 1, None);
|
||||||
let (entry1, _guard1) = default_entry();
|
let (entry1, _guard1) = default_entry();
|
||||||
let (entry2, _guard2) = default_entry();
|
let (entry2, _guard2) = default_entry();
|
||||||
state.append(entry1);
|
state.append(entry1);
|
||||||
@ -431,14 +448,14 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_queue_append() {
|
async fn test_queue_append() {
|
||||||
let queue = Queue::new(false, 1);
|
let queue = Queue::new(false, 1, None);
|
||||||
let (entry, _guard) = default_entry();
|
let (entry, _guard) = default_entry();
|
||||||
queue.append(entry);
|
queue.append(entry);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_queue_next_batch_empty() {
|
async fn test_queue_next_batch_empty() {
|
||||||
let queue = Queue::new(false, 1);
|
let queue = Queue::new(false, 1, None);
|
||||||
|
|
||||||
assert!(queue.next_batch(None, 1, 1).await.is_none());
|
assert!(queue.next_batch(None, 1, 1).await.is_none());
|
||||||
assert!(queue.next_batch(Some(1), 1, 1).await.is_none());
|
assert!(queue.next_batch(Some(1), 1, 1).await.is_none());
|
||||||
@ -446,7 +463,7 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_queue_next_batch_min_size() {
|
async fn test_queue_next_batch_min_size() {
|
||||||
let queue = Queue::new(false, 1);
|
let queue = Queue::new(false, 1, None);
|
||||||
let (entry1, _guard1) = default_entry();
|
let (entry1, _guard1) = default_entry();
|
||||||
let (entry2, _guard2) = default_entry();
|
let (entry2, _guard2) = default_entry();
|
||||||
queue.append(entry1);
|
queue.append(entry1);
|
||||||
@ -479,7 +496,7 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_queue_next_batch_token_budget() {
|
async fn test_queue_next_batch_token_budget() {
|
||||||
let queue = Queue::new(false, 1);
|
let queue = Queue::new(false, 1, None);
|
||||||
let (entry1, _guard1) = default_entry();
|
let (entry1, _guard1) = default_entry();
|
||||||
let (entry2, _guard2) = default_entry();
|
let (entry2, _guard2) = default_entry();
|
||||||
queue.append(entry1);
|
queue.append(entry1);
|
||||||
@ -504,7 +521,7 @@ mod tests {
|
|||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
async fn test_queue_next_batch_dropped_receiver() {
|
async fn test_queue_next_batch_dropped_receiver() {
|
||||||
let queue = Queue::new(false, 1);
|
let queue = Queue::new(false, 1, None);
|
||||||
let (entry, _) = default_entry();
|
let (entry, _) = default_entry();
|
||||||
queue.append(entry);
|
queue.append(entry);
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ use axum::response::sse::{Event, KeepAlive, Sse};
|
|||||||
use axum::response::{IntoResponse, Response};
|
use axum::response::{IntoResponse, Response};
|
||||||
use axum::routing::{get, post};
|
use axum::routing::{get, post};
|
||||||
use axum::{http, Json, Router};
|
use axum::{http, Json, Router};
|
||||||
use axum_tracing_opentelemetry::opentelemetry_tracing_layer;
|
use axum_tracing_opentelemetry::middleware::OtelAxumLayer;
|
||||||
use futures::stream::StreamExt;
|
use futures::stream::StreamExt;
|
||||||
use futures::Stream;
|
use futures::Stream;
|
||||||
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
|
||||||
@ -396,7 +396,7 @@ async fn generate_stream(
|
|||||||
// StreamResponse
|
// StreamResponse
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
top_tokens: top_tokens,
|
top_tokens,
|
||||||
generated_text: None,
|
generated_text: None,
|
||||||
details: None,
|
details: None,
|
||||||
};
|
};
|
||||||
@ -458,7 +458,7 @@ async fn generate_stream(
|
|||||||
|
|
||||||
let stream_token = StreamResponse {
|
let stream_token = StreamResponse {
|
||||||
token,
|
token,
|
||||||
top_tokens: top_tokens,
|
top_tokens,
|
||||||
generated_text: Some(output_text),
|
generated_text: Some(output_text),
|
||||||
details
|
details
|
||||||
};
|
};
|
||||||
@ -595,6 +595,7 @@ pub async fn run(
|
|||||||
max_waiting_tokens,
|
max_waiting_tokens,
|
||||||
max_concurrent_requests,
|
max_concurrent_requests,
|
||||||
shard_info.requires_padding,
|
shard_info.requires_padding,
|
||||||
|
shard_info.window_size,
|
||||||
generation_health,
|
generation_health,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -695,7 +696,7 @@ pub async fn run(
|
|||||||
.layer(Extension(compat_return_full_text))
|
.layer(Extension(compat_return_full_text))
|
||||||
.layer(Extension(infer))
|
.layer(Extension(infer))
|
||||||
.layer(Extension(prom_handle.clone()))
|
.layer(Extension(prom_handle.clone()))
|
||||||
.layer(opentelemetry_tracing_layer())
|
.layer(OtelAxumLayer::default())
|
||||||
.layer(cors_layer);
|
.layer(cors_layer);
|
||||||
|
|
||||||
if ngrok {
|
if ngrok {
|
||||||
@ -792,7 +793,7 @@ async fn shutdown_signal() {
|
|||||||
|
|
||||||
impl From<i32> for FinishReason {
|
impl From<i32> for FinishReason {
|
||||||
fn from(finish_reason: i32) -> Self {
|
fn from(finish_reason: i32) -> Self {
|
||||||
let finish_reason = text_generation_client::FinishReason::from_i32(finish_reason).unwrap();
|
let finish_reason = text_generation_client::FinishReason::try_from(finish_reason).unwrap();
|
||||||
match finish_reason {
|
match finish_reason {
|
||||||
text_generation_client::FinishReason::Length => FinishReason::Length,
|
text_generation_client::FinishReason::Length => FinishReason::Length,
|
||||||
text_generation_client::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
|
text_generation_client::FinishReason::EosToken => FinishReason::EndOfSequenceToken,
|
||||||
|
@ -276,7 +276,7 @@ impl Validation {
|
|||||||
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
truncate: truncate.unwrap_or(self.max_input_length) as u32,
|
||||||
parameters,
|
parameters,
|
||||||
stopping_parameters,
|
stopping_parameters,
|
||||||
top_n_tokens: top_n_tokens,
|
top_n_tokens,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
2
server/.gitignore
vendored
2
server/.gitignore
vendored
@ -159,3 +159,5 @@ safetensors
|
|||||||
flash-attention/
|
flash-attention/
|
||||||
flash-attention-v2/
|
flash-attention-v2/
|
||||||
vllm/
|
vllm/
|
||||||
|
llm-awq/
|
||||||
|
eetq/
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
include Makefile-flash-att
|
include Makefile-flash-att
|
||||||
include Makefile-flash-att-v2
|
include Makefile-flash-att-v2
|
||||||
include Makefile-vllm
|
include Makefile-vllm
|
||||||
|
include Makefile-awq
|
||||||
|
include Makefile-eetq
|
||||||
|
|
||||||
unit-tests:
|
unit-tests:
|
||||||
pytest -s -vv -m "not private" tests
|
pytest -s -vv -m "not private" tests
|
||||||
|
13
server/Makefile-awq
Normal file
13
server/Makefile-awq
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
awq_commit := f084f40bd996f3cf3a0633c1ad7d9d476c318aaa
|
||||||
|
|
||||||
|
awq:
|
||||||
|
rm -rf llm-awq
|
||||||
|
git clone https://github.com/mit-han-lab/llm-awq
|
||||||
|
|
||||||
|
build-awq: awq
|
||||||
|
cd llm-awq/ && git fetch && git checkout $(awq_commit)
|
||||||
|
cd llm-awq/awq/kernels && python setup.py build
|
||||||
|
|
||||||
|
install-awq: build-awq
|
||||||
|
pip uninstall awq_inference_engine -y || true
|
||||||
|
cd llm-awq/awq/kernels && python setup.py install
|
13
server/Makefile-eetq
Normal file
13
server/Makefile-eetq
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
eetq_commit := 323827dd471458a84e9c840f614e4592b157a4b1
|
||||||
|
|
||||||
|
eetq:
|
||||||
|
# Clone eetq
|
||||||
|
pip install packaging
|
||||||
|
git clone https://github.com/NetEase-FuXi/EETQ.git eetq
|
||||||
|
|
||||||
|
build-eetq: eetq
|
||||||
|
cd eetq && git fetch && git checkout $(eetq_commit)
|
||||||
|
cd eetq && python setup.py build
|
||||||
|
|
||||||
|
install-eetq: build-eetq
|
||||||
|
cd eetq && python setup.py install
|
@ -1,4 +1,4 @@
|
|||||||
flash_att_v2_commit := 4f285b354796fb17df8636485b9a04df3ebbb7dc
|
flash_att_v2_commit := 601b4dc48dbe9d87c468daa2b4c0c8388b83753c
|
||||||
|
|
||||||
flash-attention-v2:
|
flash-attention-v2:
|
||||||
# Clone flash attention
|
# Clone flash attention
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
vllm_commit := e86af624d059969b0fb07b075b1d338bf10c3365
|
vllm_commit := 25dbff97d5a8f2ba331847237b458b2692e9ae78
|
||||||
|
|
||||||
vllm:
|
vllm:
|
||||||
# Clone vllm
|
# Clone vllm
|
||||||
|
594
server/poetry.lock
generated
594
server/poetry.lock
generated
@ -323,19 +323,19 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "datasets"
|
name = "datasets"
|
||||||
version = "2.14.4"
|
version = "2.14.5"
|
||||||
description = "HuggingFace community-driven open-source library of datasets"
|
description = "HuggingFace community-driven open-source library of datasets"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8.0"
|
python-versions = ">=3.8.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "datasets-2.14.4-py3-none-any.whl", hash = "sha256:29336bd316a7d827ccd4da2236596279b20ca2ac78f64c04c9483da7cbc2459b"},
|
{file = "datasets-2.14.5-py3-none-any.whl", hash = "sha256:dd4155091034cba04d5a28711f2ed3944275ed15c5d0c5a2d0b6b9ea34a2bdfe"},
|
||||||
{file = "datasets-2.14.4.tar.gz", hash = "sha256:ef29c2b5841de488cd343cfc26ab979bff77efa4d2285af51f1ad7db5c46a83b"},
|
{file = "datasets-2.14.5.tar.gz", hash = "sha256:b738a86540ab8e1a7806c8a3790b67be0056318d0c5d5a58a1b0dbdd76c0f568"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
aiohttp = "*"
|
aiohttp = "*"
|
||||||
dill = ">=0.3.0,<0.3.8"
|
dill = ">=0.3.0,<0.3.8"
|
||||||
fsspec = {version = ">=2021.11.1", extras = ["http"]}
|
fsspec = {version = ">=2023.1.0,<2023.9.0", extras = ["http"]}
|
||||||
huggingface-hub = ">=0.14.0,<1.0.0"
|
huggingface-hub = ">=0.14.0,<1.0.0"
|
||||||
multiprocess = "*"
|
multiprocess = "*"
|
||||||
numpy = ">=1.17"
|
numpy = ">=1.17"
|
||||||
@ -421,21 +421,19 @@ test = ["pytest (>=6)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.12.3"
|
version = "3.12.4"
|
||||||
description = "A platform independent file lock."
|
description = "A platform independent file lock."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"},
|
{file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"},
|
||||||
{file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"},
|
{file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""}
|
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
|
docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"]
|
||||||
testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
|
testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"]
|
||||||
|
typing = ["typing-extensions (>=4.7.1)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "frozenlist"
|
name = "frozenlist"
|
||||||
@ -582,148 +580,148 @@ testing = ["protobuf (>=4.21.9)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio"
|
name = "grpcio"
|
||||||
version = "1.57.0"
|
version = "1.58.0"
|
||||||
description = "HTTP/2-based RPC framework"
|
description = "HTTP/2-based RPC framework"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"},
|
{file = "grpcio-1.58.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:3e6bebf1dfdbeb22afd95650e4f019219fef3ab86d3fca8ebade52e4bc39389a"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"},
|
{file = "grpcio-1.58.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:cde11577d5b6fd73a00e6bfa3cf5f428f3f33c2d2878982369b5372bbc4acc60"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"},
|
{file = "grpcio-1.58.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:a2d67ff99e70e86b2be46c1017ae40b4840d09467d5455b2708de6d4c127e143"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"},
|
{file = "grpcio-1.58.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1ed979b273a81de36fc9c6716d9fb09dd3443efa18dcc8652501df11da9583e9"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"},
|
{file = "grpcio-1.58.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:458899d2ebd55d5ca2350fd3826dfd8fcb11fe0f79828ae75e2b1e6051d50a29"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"},
|
{file = "grpcio-1.58.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bc7ffef430b80345729ff0a6825e9d96ac87efe39216e87ac58c6c4ef400de93"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"},
|
{file = "grpcio-1.58.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5b23d75e5173faa3d1296a7bedffb25afd2fddb607ef292dfc651490c7b53c3d"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"},
|
{file = "grpcio-1.58.0-cp310-cp310-win32.whl", hash = "sha256:fad9295fe02455d4f158ad72c90ef8b4bcaadfdb5efb5795f7ab0786ad67dd58"},
|
||||||
{file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"},
|
{file = "grpcio-1.58.0-cp310-cp310-win_amd64.whl", hash = "sha256:bc325fed4d074367bebd465a20763586e5e1ed5b943e9d8bc7c162b1f44fd602"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"},
|
{file = "grpcio-1.58.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:652978551af02373a5a313e07bfef368f406b5929cf2d50fa7e4027f913dbdb4"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"},
|
{file = "grpcio-1.58.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:9f13a171281ebb4d7b1ba9f06574bce2455dcd3f2f6d1fbe0fd0d84615c74045"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"},
|
{file = "grpcio-1.58.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8774219e21b05f750eef8adc416e9431cf31b98f6ce9def288e4cea1548cbd22"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"},
|
{file = "grpcio-1.58.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09206106848462763f7f273ca93d2d2d4d26cab475089e0de830bb76be04e9e8"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"},
|
{file = "grpcio-1.58.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62831d5e251dd7561d9d9e83a0b8655084b2a1f8ea91e4bd6b3cedfefd32c9d2"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"},
|
{file = "grpcio-1.58.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:212f38c6a156862098f6bdc9a79bf850760a751d259d8f8f249fc6d645105855"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"},
|
{file = "grpcio-1.58.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4b12754af201bb993e6e2efd7812085ddaaef21d0a6f0ff128b97de1ef55aa4a"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"},
|
{file = "grpcio-1.58.0-cp311-cp311-win32.whl", hash = "sha256:3886b4d56bd4afeac518dbc05933926198aa967a7d1d237a318e6fbc47141577"},
|
||||||
{file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"},
|
{file = "grpcio-1.58.0-cp311-cp311-win_amd64.whl", hash = "sha256:002f228d197fea12797a14e152447044e14fb4fdb2eb5d6cfa496f29ddbf79ef"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"},
|
{file = "grpcio-1.58.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:b5e8db0aff0a4819946215f156bd722b6f6c8320eb8419567ffc74850c9fd205"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"},
|
{file = "grpcio-1.58.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:201e550b7e2ede113b63e718e7ece93cef5b0fbf3c45e8fe4541a5a4305acd15"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"},
|
{file = "grpcio-1.58.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:d79b660681eb9bc66cc7cbf78d1b1b9e335ee56f6ea1755d34a31108b80bd3c8"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"},
|
{file = "grpcio-1.58.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ef8d4a76d2c7d8065aba829f8d0bc0055495c998dce1964ca5b302d02514fb3"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"},
|
{file = "grpcio-1.58.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6cba491c638c76d3dc6c191d9c75041ca5b8f5c6de4b8327ecdcab527f130bb4"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"},
|
{file = "grpcio-1.58.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6801ff6652ecd2aae08ef994a3e49ff53de29e69e9cd0fd604a79ae4e545a95c"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"},
|
{file = "grpcio-1.58.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:24edec346e69e672daf12b2c88e95c6f737f3792d08866101d8c5f34370c54fd"},
|
||||||
{file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"},
|
{file = "grpcio-1.58.0-cp37-cp37m-win_amd64.whl", hash = "sha256:7e473a7abad9af48e3ab5f3b5d237d18208024d28ead65a459bd720401bd2f8f"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"},
|
{file = "grpcio-1.58.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:4891bbb4bba58acd1d620759b3be11245bfe715eb67a4864c8937b855b7ed7fa"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"},
|
{file = "grpcio-1.58.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:e9f995a8a421405958ff30599b4d0eec244f28edc760de82f0412c71c61763d2"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"},
|
{file = "grpcio-1.58.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2f85f87e2f087d9f632c085b37440a3169fda9cdde80cb84057c2fc292f8cbdf"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"},
|
{file = "grpcio-1.58.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb6b92036ff312d5b4182fa72e8735d17aceca74d0d908a7f08e375456f03e07"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"},
|
{file = "grpcio-1.58.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d81c2b2b24c32139dd2536972f1060678c6b9fbd106842a9fcdecf07b233eccd"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"},
|
{file = "grpcio-1.58.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:fbcecb6aedd5c1891db1d70efbfbdc126c986645b5dd616a045c07d6bd2dfa86"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"},
|
{file = "grpcio-1.58.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:92ae871a902cf19833328bd6498ec007b265aabf2fda845ab5bd10abcaf4c8c6"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"},
|
{file = "grpcio-1.58.0-cp38-cp38-win32.whl", hash = "sha256:dc72e04620d49d3007771c0e0348deb23ca341c0245d610605dddb4ac65a37cb"},
|
||||||
{file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"},
|
{file = "grpcio-1.58.0-cp38-cp38-win_amd64.whl", hash = "sha256:1c1c5238c6072470c7f1614bf7c774ffde6b346a100521de9ce791d1e4453afe"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"},
|
{file = "grpcio-1.58.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fe643af248442221db027da43ed43e53b73e11f40c9043738de9a2b4b6ca7697"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"},
|
{file = "grpcio-1.58.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:128eb1f8e70676d05b1b0c8e6600320fc222b3f8c985a92224248b1367122188"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"},
|
{file = "grpcio-1.58.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:039003a5e0ae7d41c86c768ef8b3ee2c558aa0a23cf04bf3c23567f37befa092"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"},
|
{file = "grpcio-1.58.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f061722cad3f9aabb3fbb27f3484ec9d4667b7328d1a7800c3c691a98f16bb0"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"},
|
{file = "grpcio-1.58.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0af11938acf8cd4cf815c46156bcde36fa5850518120920d52620cc3ec1830"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"},
|
{file = "grpcio-1.58.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d4cef77ad2fed42b1ba9143465856d7e737279854e444925d5ba45fc1f3ba727"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"},
|
{file = "grpcio-1.58.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24765a627eb4d9288ace32d5104161c3654128fe27f2808ecd6e9b0cfa7fc8b9"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"},
|
{file = "grpcio-1.58.0-cp39-cp39-win32.whl", hash = "sha256:f0241f7eb0d2303a545136c59bc565a35c4fc3b924ccbd69cb482f4828d6f31c"},
|
||||||
{file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"},
|
{file = "grpcio-1.58.0-cp39-cp39-win_amd64.whl", hash = "sha256:dcfba7befe3a55dab6fe1eb7fc9359dc0c7f7272b30a70ae0af5d5b063842f28"},
|
||||||
{file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"},
|
{file = "grpcio-1.58.0.tar.gz", hash = "sha256:532410c51ccd851b706d1fbc00a87be0f5312bd6f8e5dbf89d4e99c7f79d7499"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
protobuf = ["grpcio-tools (>=1.57.0)"]
|
protobuf = ["grpcio-tools (>=1.58.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio-reflection"
|
name = "grpcio-reflection"
|
||||||
version = "1.57.0"
|
version = "1.58.0"
|
||||||
description = "Standard Protobuf Reflection Service for gRPC"
|
description = "Standard Protobuf Reflection Service for gRPC"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-reflection-1.57.0.tar.gz", hash = "sha256:8f63a18729cba995a172f8325235f5094cb066febec75f9a3b1b2e28328aa166"},
|
{file = "grpcio-reflection-1.58.0.tar.gz", hash = "sha256:e6048a758d17b6ca1705258e7ee5d926d2960a95ae08ba0929dd233e505acd3d"},
|
||||||
{file = "grpcio_reflection-1.57.0-py3-none-any.whl", hash = "sha256:d7deb8587f9d0095fb5d367c2aa5ce1380e3f23b0f8bca6c00bc404c5429cb6a"},
|
{file = "grpcio_reflection-1.58.0-py3-none-any.whl", hash = "sha256:fa18885d8a09cef02c9a6b1d17dfed0279f1f401b06bd1f75958b78ebf1b5c0c"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
grpcio = ">=1.57.0"
|
grpcio = ">=1.58.0"
|
||||||
protobuf = ">=4.21.6"
|
protobuf = ">=4.21.6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio-status"
|
name = "grpcio-status"
|
||||||
version = "1.57.0"
|
version = "1.58.0"
|
||||||
description = "Status proto mapping for gRPC"
|
description = "Status proto mapping for gRPC"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-status-1.57.0.tar.gz", hash = "sha256:b098da99df1eebe58337f8f78e50df990273ccacc1226fddeb47c590e3df9e02"},
|
{file = "grpcio-status-1.58.0.tar.gz", hash = "sha256:0b42e70c0405a66a82d9e9867fa255fe59e618964a6099b20568c31dd9099766"},
|
||||||
{file = "grpcio_status-1.57.0-py3-none-any.whl", hash = "sha256:15d6af055914ebbc4ed17e55ebfb8e6bb17a45a57fea32e6af19978fb7844690"},
|
{file = "grpcio_status-1.58.0-py3-none-any.whl", hash = "sha256:36d46072b71a00147709ebce49344ac59b4b8960942acf0f813a8a7d6c1c28e0"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
googleapis-common-protos = ">=1.5.5"
|
googleapis-common-protos = ">=1.5.5"
|
||||||
grpcio = ">=1.57.0"
|
grpcio = ">=1.58.0"
|
||||||
protobuf = ">=4.21.6"
|
protobuf = ">=4.21.6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio-tools"
|
name = "grpcio-tools"
|
||||||
version = "1.57.0"
|
version = "1.58.0"
|
||||||
description = "Protobuf code generator for gRPC"
|
description = "Protobuf code generator for gRPC"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-tools-1.57.0.tar.gz", hash = "sha256:2f16130d869ce27ecd623194547b649dd657333ec7e8644cc571c645781a9b85"},
|
{file = "grpcio-tools-1.58.0.tar.gz", hash = "sha256:6f4d80ceb591e31ca4dceec747dbe56132e1392a0a9bb1c8fe001d1b5cac898a"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:4fb8a8468031f858381a576078924af364a08833d8f8f3237018252c4573a802"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:60c874908f3b40f32f1bb0221f7b3ab65ecb53a4d0a9f0a394f031f1b292c177"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:35bf0dad8a3562043345236c26d0053a856fb06c04d7da652f2ded914e508ae7"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:1852e798f31e5437ca7b37abc910e028b34732fb19364862cedb87b1dab66fad"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:ec9aab2fb6783c7fc54bc28f58eb75f1ca77594e6b0fd5e5e7a8114a95169fe0"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:149fb48f53cb691a6328f68bed8e4036c730f7106b7f98e92c2c0403f0b9e93c"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0cf5fc0a1c23f8ea34b408b72fb0e90eec0f404ad4dba98e8f6da3c9ce34e2ed"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba3d383e5ca93826038b70f326fce8e8d12dd9b2f64d363a3d612f7475f12dd2"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26e69d08a515554e0cfe1ec4d31568836f4b17f0ff82294f957f629388629eb9"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6997511e9d2979f7a2389479682dbb06823f21a904e8fb0a5c6baaf1b4b4a863"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c39a3656576b6fdaaf28abe0467f7a7231df4230c1bee132322dbc3209419e7f"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8de0b701da479643f71fad71fe66885cddd89441ae16e2c724939b47742dc72e"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f64f8ab22d27d4a5693310748d35a696061c3b5c7b8c4fb4ab3b4bc1068b6b56"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:43cc23908b63fcaefe690b10f68a2d8652c994b5b36ab77d2271d9608c895320"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-win32.whl", hash = "sha256:d2a134756f4db34759a5cc7f7e43f7eb87540b68d1cca62925593c6fb93924f7"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-win32.whl", hash = "sha256:2c2221123d010dc6231799e63a37f2f4786bf614ef65b23009c387cd20d8b193"},
|
||||||
{file = "grpcio_tools-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:9a3d60fb8d46ede26c1907c146561b3a9caa20a7aff961bc661ef8226f85a2e9"},
|
{file = "grpcio_tools-1.58.0-cp310-cp310-win_amd64.whl", hash = "sha256:df2788736bdf58abe7b0e4d6b1ff806f7686c98c5ad900da312252e3322d91c4"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:aac98ecad8f7bd4301855669d42a5d97ef7bb34bec2b1e74c7a0641d47e313cf"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:b6ea5578712cdb29b0ff60bfc6405bf0e8d681b9c71d106dd1cda54fe7fe4e55"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cdd020cb68b51462983b7c2dfbc3eb6ede032b8bf438d4554df0c3f08ce35c76"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:c29880f491581c83181c0a84a4d11402af2b13166a5266f64e246adf1da7aa66"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:f54081b08419a39221cd646363b5708857c696b3ad4784f1dcf310891e33a5f7"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:32d51e933c3565414dd0835f930bb28a1cdeba435d9d2c87fa3cf8b1d284db3c"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed85a0291fff45b67f2557fe7f117d3bc7af8b54b8619d27bf374b5c8b7e3ca2"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ad9d77f25514584b1ddc981d70c9e50dfcfc388aa5ba943eee67520c5267ed9"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e868cd6feb3ef07d4b35be104fe1fd0657db05259ff8f8ec5e08f4f89ca1191d"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4882382631e6352819059278a5c878ce0b067008dd490911d16d5616e8a36d85"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:dfb6f6120587b8e228a3cae5ee4985b5bdc18501bad05c49df61965dfc9d70a9"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d84091a189d848d94645b7c48b61734c12ec03b0d46e5fc0049343a26989ac5c"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a7ad7f328e28fc97c356d0f10fb10d8b5151bb65aa7cf14bf8084513f0b7306"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:85ac28a9621e9b92a3fc416288c4ce45542db0b4c31b3e23031dd8e0a0ec5590"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-win32.whl", hash = "sha256:9867f2817b1a0c93c523f89ac6c9d8625548af4620a7ce438bf5a76e23327284"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-win32.whl", hash = "sha256:7371d8ea80234b29affec145e25569523f549520ed7e53b2aa92bed412cdecfd"},
|
||||||
{file = "grpcio_tools-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:1f9e917a9f18087f6c14b4d4508fb94fca5c2f96852363a89232fb9b2124ac1f"},
|
{file = "grpcio_tools-1.58.0-cp311-cp311-win_amd64.whl", hash = "sha256:6997df6e7c5cf4d3ddc764240c1ff6a04b45d70ec28913b38fbc6396ef743e12"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:9f2aefa8a37bd2c4db1a3f1aca11377e2766214520fb70e67071f4ff8d8b0fa5"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:ac65b8d6e3acaf88b815edf9af88ff844b6600ff3d2591c05ba4f655b45d5fb4"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:850cbda0ec5d24c39e7215ede410276040692ca45d105fbbeada407fa03f0ac0"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:88e8191d0dd789bebf42533808728f5ce75d2c51e2a72bdf20abe5b5e3fbec42"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:6fa52972c9647876ea35f6dc2b51002a74ed900ec7894586cbb2fe76f64f99de"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:a3dbece2a121761499a659b799979d4b738586d1065439053de553773eee11ca"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0eea89d7542719594e50e2283f51a072978b953e8b3e9fd7c59a2c762d4c1"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1086fe240c4c879b9721952b47d46996deb283c2d9355a8dc24a804811aacf70"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3da5240211252fc70a6451fe00c143e2ab2f7bfc2445695ad2ed056b8e48d96"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7ae3dca059d5b358dd03fb63277428fa7d771605d4074a019138dd38d70719a"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a0256f8786ac9e4db618a1aa492bb3472569a0946fd3ee862ffe23196323da55"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:3f8904ac7fc3da2e874f00b3a986e8b7e004f499344a8e7eb213c26dfb025041"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c026bdf5c1366ce88b7bbe2d8207374d675afd3fd911f60752103de3da4a41d2"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:aadbd8393ae332e49731adb31e741f2e689989150569b7acc939f5ea43124e2d"},
|
||||||
{file = "grpcio_tools-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9053c2f655589545be08b9d6a673e92970173a4bf11a4b9f18cd6e9af626b587"},
|
{file = "grpcio_tools-1.58.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1cb6e24194786687d4f23c64de1f0ce553af51de22746911bc37340f85f9783e"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:81ec4dbb696e095057b2528d11a8da04be6bbe2b967fa07d4ea9ba6354338cbf"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:6ec43909095c630df3e479e77469bdad367067431f4af602f6ccb978a3b78afd"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:495e2946406963e0b9f063f76d5af0f2a19517dac2b367b5b044432ac9194296"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:4be49ed320b0ebcbc21d19ef555fbf229c1c452105522b728e1171ee2052078e"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:7b46fc6aa8eb7edd18cafcd21fd98703cb6c09e46b507de335fca7f0161dfccb"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:28eefebddec3d3adf19baca78f8b82a2287d358e1b1575ae018cdca8eacc6269"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb81ff861692111fa81bd85f64584e624cb4013bd66fbce8a209b8893f5ce398"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ef8c696e9d78676cc3f583a92bbbf2c84e94e350f7ad22f150a52559f4599d1"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a42dc220eb5305f470855c9284f4c8e85ae59d6d742cd07946b0cbe5e9ca186"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9aeb5949e46558d21c51fd3ec3eeecc59c94dbca76c67c0a80d3da6b7437930c"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:90d10d9038ba46a595a223a34f136c9230e3d6d7abc2433dbf0e1c31939d3a8b"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f7144aad9396d35fb1b80429600a970b559c2ad4d07020eeb180fe83cea2bee"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5bc3e6d338aefb052e19cedabe00452be46d0c10a4ed29ee77abb00402e438fe"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4ee26e9253a721fff355737649678535f76cf5d642aa3ac0cd937832559b90af"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-win32.whl", hash = "sha256:34b36217b17b5bea674a414229913e1fd80ede328be51e1b531fcc62abd393b0"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-win32.whl", hash = "sha256:343f572312039059a8797d6e29a7fc62196e73131ab01755660a9d48202267c1"},
|
||||||
{file = "grpcio_tools-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbde4004a0688400036342ff73e3706e8940483e2871547b1354d59e93a38277"},
|
{file = "grpcio_tools-1.58.0-cp38-cp38-win_amd64.whl", hash = "sha256:cd7acfbb43b7338a78cf4a67528d05530d574d92b7c829d185b78dfc451d158f"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:784574709b9690dc28696617ea69352e2132352fdfc9bc89afa8e39f99ae538e"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:46628247fbce86d18232eead24bd22ed0826c79f3fe2fc2fbdbde45971361049"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:85ac4e62eb44428cde025fd9ab7554002315fc7880f791c553fc5a0015cc9931"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:51587842a54e025a3d0d37afcf4ef2b7ac1def9a5d17448665cb424b53d6c287"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:dc771d4db5701f280957bbcee91745e0686d00ed1c6aa7e05ba30a58b02d70a1"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:a062ae3072a2a39a3c057f4d68b57b021f1dd2956cd09aab39709f6af494e1de"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3ac06703c412f8167a9062eaf6099409967e33bf98fa5b02be4b4689b6bdf39"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eec3c93a08df11c80ef1c29a616bcbb0d83dbc6ea41b48306fcacc720416dfa7"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02d78c034109f46032c7217260066d49d41e6bcaf588fa28fa40fe2f83445347"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b63f823ac991ff77104da614d2a2485a59d37d57830eb2e387a6e2a3edc7fa2b"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2db25f15ed44327f2e02d0c4fe741ac966f9500e407047d8a7c7fccf2df65616"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:579c11a9f198847ed48dbc4f211c67fe96a73320b87c81f01b044b72e24a7d77"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2b417c97936d94874a3ce7ed8deab910f2233e3612134507cfee4af8735c38a6"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6ca2fc1dd8049d417a5034d944c9df05cee76f855b3e431627ab4292e7c01c47"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-win32.whl", hash = "sha256:f717cce5093e6b6049d9ea6d12fdf3658efdb1a80772f7737db1f8510b876df6"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-win32.whl", hash = "sha256:453023120114c35d3d9d6717ea0820e5d5c140f51f9d0b621de4397ff854471b"},
|
||||||
{file = "grpcio_tools-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:1c0e8a1a32973a5d59fbcc19232f925e5c48116e9411f788033a31c5ca5130b4"},
|
{file = "grpcio_tools-1.58.0-cp39-cp39-win_amd64.whl", hash = "sha256:b6c896f1df99c35cf062d4803c15663ff00a33ff09add28baa6e475cf6b5e258"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
grpcio = ">=1.57.0"
|
grpcio = ">=1.58.0"
|
||||||
protobuf = ">=4.21.6,<5.0dev"
|
protobuf = ">=4.21.6,<5.0dev"
|
||||||
setuptools = "*"
|
setuptools = "*"
|
||||||
|
|
||||||
@ -871,6 +869,16 @@ files = [
|
|||||||
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
|
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
|
||||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
|
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
|
||||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
|
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
|
||||||
|
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
|
||||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
|
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
|
||||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
|
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
|
||||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
|
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
|
||||||
@ -1051,36 +1059,43 @@ test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "numpy"
|
name = "numpy"
|
||||||
version = "1.25.2"
|
version = "1.26.0"
|
||||||
description = "Fundamental package for array computing in Python"
|
description = "Fundamental package for array computing in Python"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = "<3.13,>=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "numpy-1.25.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:db3ccc4e37a6873045580d413fe79b68e47a681af8db2e046f1dacfa11f86eb3"},
|
{file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:90319e4f002795ccfc9050110bbbaa16c944b1c37c0baeea43c5fb881693ae1f"},
|
{file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfe4a913e29b418d096e696ddd422d8a5d13ffba4ea91f9f60440a3b759b0187"},
|
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08f2e037bba04e707eebf4bc934f1972a315c883a9e0ebfa8a7756eabf9e357"},
|
{file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bec1e7213c7cb00d67093247f8c4db156fd03075f49876957dca4711306d39c9"},
|
{file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-win32.whl", hash = "sha256:7dc869c0c75988e1c693d0e2d5b26034644399dd929bc049db55395b1379e044"},
|
{file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"},
|
||||||
{file = "numpy-1.25.2-cp310-cp310-win_amd64.whl", hash = "sha256:834b386f2b8210dca38c71a6e0f4fd6922f7d3fcff935dbe3a570945acb1b545"},
|
{file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5462d19336db4560041517dbb7759c21d181a67cb01b36ca109b2ae37d32418"},
|
{file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c5652ea24d33585ea39eb6a6a15dac87a1206a692719ff45d53c5282e66d4a8f"},
|
{file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d60fbae8e0019865fc4784745814cff1c421df5afee233db6d88ab4f14655a2"},
|
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e7f0f7f6d0eee8364b9a6304c2845b9c491ac706048c7e8cf47b83123b8dbf"},
|
{file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bb33d5a1cf360304754913a350edda36d5b8c5331a8237268c48f91253c3a364"},
|
{file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-win32.whl", hash = "sha256:5883c06bb92f2e6c8181df7b39971a5fb436288db58b5a1c3967702d4278691d"},
|
{file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"},
|
||||||
{file = "numpy-1.25.2-cp311-cp311-win_amd64.whl", hash = "sha256:5c97325a0ba6f9d041feb9390924614b60b99209a71a69c876f71052521d42a4"},
|
{file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b79e513d7aac42ae918db3ad1341a015488530d0bb2a6abcbdd10a3a829ccfd3"},
|
{file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:eb942bfb6f84df5ce05dbf4b46673ffed0d3da59f13635ea9b926af3deb76926"},
|
{file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e0746410e73384e70d286f93abf2520035250aad8c5714240b0492a7302fdca"},
|
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295"},
|
{file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8b77775f4b7df768967a7c8b3567e309f617dd5e99aeb886fa14dc1a0791141f"},
|
{file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-win32.whl", hash = "sha256:2792d23d62ec51e50ce4d4b7d73de8f67a2fd3ea710dcbc8563a51a03fb07b01"},
|
{file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"},
|
||||||
{file = "numpy-1.25.2-cp39-cp39-win_amd64.whl", hash = "sha256:76b4115d42a7dfc5d485d358728cdd8719be33cc5ec6ec08632a5d6fca2ed380"},
|
{file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"},
|
||||||
{file = "numpy-1.25.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a1329e26f46230bf77b02cc19e900db9b52f398d6722ca853349a782d4cff55"},
|
{file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"},
|
||||||
{file = "numpy-1.25.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c3abc71e8b6edba80a01a52e66d83c5d14433cbcd26a40c329ec7ed09f37901"},
|
{file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"},
|
||||||
{file = "numpy-1.25.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:1b9735c27cea5d995496f46a8b1cd7b408b3f34b6d50459d9ac8fe3a20cc17bf"},
|
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"},
|
||||||
{file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"},
|
{file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"},
|
||||||
|
{file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"},
|
||||||
|
{file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"},
|
||||||
|
{file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"},
|
||||||
|
{file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"},
|
||||||
|
{file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"},
|
||||||
|
{file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"},
|
||||||
|
{file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1250,70 +1265,71 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pandas"
|
name = "pandas"
|
||||||
version = "2.0.3"
|
version = "2.1.1"
|
||||||
description = "Powerful data structures for data analysis, time series, and statistics"
|
description = "Powerful data structures for data analysis, time series, and statistics"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"},
|
{file = "pandas-2.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:58d997dbee0d4b64f3cb881a24f918b5f25dd64ddf31f467bb9b67ae4c63a1e4"},
|
||||||
{file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"},
|
{file = "pandas-2.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02304e11582c5d090e5a52aec726f31fe3f42895d6bfc1f28738f9b64b6f0614"},
|
||||||
{file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"},
|
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffa8f0966de2c22de408d0e322db2faed6f6e74265aa0856f3824813cf124363"},
|
||||||
{file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"},
|
{file = "pandas-2.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1f84c144dee086fe4f04a472b5cd51e680f061adf75c1ae4fc3a9275560f8f4"},
|
||||||
{file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"},
|
{file = "pandas-2.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:75ce97667d06d69396d72be074f0556698c7f662029322027c226fd7a26965cb"},
|
||||||
{file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"},
|
{file = "pandas-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:4c3f32fd7c4dccd035f71734df39231ac1a6ff95e8bdab8d891167197b7018d2"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"},
|
{file = "pandas-2.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e2959720b70e106bb1d8b6eadd8ecd7c8e99ccdbe03ee03260877184bb2877d"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"},
|
{file = "pandas-2.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:25e8474a8eb258e391e30c288eecec565bfed3e026f312b0cbd709a63906b6f8"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"},
|
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8bd1685556f3374520466998929bade3076aeae77c3e67ada5ed2b90b4de7f0"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"},
|
{file = "pandas-2.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc3657869c7902810f32bd072f0740487f9e030c1a3ab03e0af093db35a9d14e"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"},
|
{file = "pandas-2.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:05674536bd477af36aa2effd4ec8f71b92234ce0cc174de34fd21e2ee99adbc2"},
|
||||||
{file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"},
|
{file = "pandas-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:b407381258a667df49d58a1b637be33e514b07f9285feb27769cedb3ab3d0b3a"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"},
|
{file = "pandas-2.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c747793c4e9dcece7bb20156179529898abf505fe32cb40c4052107a3c620b49"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"},
|
{file = "pandas-2.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3bcad1e6fb34b727b016775bea407311f7721db87e5b409e6542f4546a4951ea"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"},
|
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5ec7740f9ccb90aec64edd71434711f58ee0ea7f5ed4ac48be11cfa9abf7317"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"},
|
{file = "pandas-2.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29deb61de5a8a93bdd033df328441a79fcf8dd3c12d5ed0b41a395eef9cd76f0"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"},
|
{file = "pandas-2.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4f99bebf19b7e03cf80a4e770a3e65eee9dd4e2679039f542d7c1ace7b7b1daa"},
|
||||||
{file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"},
|
{file = "pandas-2.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:84e7e910096416adec68075dc87b986ff202920fb8704e6d9c8c9897fe7332d6"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"},
|
{file = "pandas-2.1.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:366da7b0e540d1b908886d4feb3d951f2f1e572e655c1160f5fde28ad4abb750"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"},
|
{file = "pandas-2.1.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9e50e72b667415a816ac27dfcfe686dc5a0b02202e06196b943d54c4f9c7693e"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"},
|
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc1ab6a25da197f03ebe6d8fa17273126120874386b4ac11c1d687df288542dd"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"},
|
{file = "pandas-2.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0dbfea0dd3901ad4ce2306575c54348d98499c95be01b8d885a2737fe4d7a98"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"},
|
{file = "pandas-2.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0489b0e6aa3d907e909aef92975edae89b1ee1654db5eafb9be633b0124abe97"},
|
||||||
{file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"},
|
{file = "pandas-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:4cdb0fab0400c2cb46dafcf1a0fe084c8bb2480a1fa8d81e19d15e12e6d4ded2"},
|
||||||
{file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"},
|
{file = "pandas-2.1.1.tar.gz", hash = "sha256:fecb198dc389429be557cde50a2d46da8434a17fe37d7d41ff102e3987fd947b"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.20.3", markers = "python_version < \"3.10\""},
|
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||||
{version = ">=1.21.0", markers = "python_version >= \"3.10\""},
|
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||||
{version = ">=1.23.2", markers = "python_version >= \"3.11\""},
|
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||||
]
|
]
|
||||||
python-dateutil = ">=2.8.2"
|
python-dateutil = ">=2.8.2"
|
||||||
pytz = ">=2020.1"
|
pytz = ">=2020.1"
|
||||||
tzdata = ">=2022.1"
|
tzdata = ">=2022.1"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"]
|
all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
|
||||||
aws = ["s3fs (>=2021.08.0)"]
|
aws = ["s3fs (>=2022.05.0)"]
|
||||||
clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"]
|
clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
|
||||||
compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"]
|
compression = ["zstandard (>=0.17.0)"]
|
||||||
computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"]
|
computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
|
||||||
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"]
|
consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
|
||||||
|
excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
|
||||||
feather = ["pyarrow (>=7.0.0)"]
|
feather = ["pyarrow (>=7.0.0)"]
|
||||||
fss = ["fsspec (>=2021.07.0)"]
|
fss = ["fsspec (>=2022.05.0)"]
|
||||||
gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"]
|
gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
|
||||||
hdf5 = ["tables (>=3.6.1)"]
|
hdf5 = ["tables (>=3.7.0)"]
|
||||||
html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"]
|
html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
|
||||||
mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"]
|
mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
|
||||||
output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"]
|
output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
|
||||||
parquet = ["pyarrow (>=7.0.0)"]
|
parquet = ["pyarrow (>=7.0.0)"]
|
||||||
performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"]
|
performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
|
||||||
plot = ["matplotlib (>=3.6.1)"]
|
plot = ["matplotlib (>=3.6.1)"]
|
||||||
postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"]
|
postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
|
||||||
spss = ["pyreadstat (>=1.1.2)"]
|
spss = ["pyreadstat (>=1.1.5)"]
|
||||||
sql-other = ["SQLAlchemy (>=1.4.16)"]
|
sql-other = ["SQLAlchemy (>=1.4.36)"]
|
||||||
test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
|
test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"]
|
||||||
xml = ["lxml (>=4.6.3)"]
|
xml = ["lxml (>=4.8.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "peft"
|
name = "peft"
|
||||||
@ -1344,67 +1360,65 @@ test = ["black (>=22.0,<23.0)", "datasets", "diffusers", "hf-doc-builder", "para
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pillow"
|
name = "pillow"
|
||||||
version = "10.0.0"
|
version = "10.0.1"
|
||||||
description = "Python Imaging Library (Fork)"
|
description = "Python Imaging Library (Fork)"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f62406a884ae75fb2f818694469519fb685cc7eaff05d3451a9ebe55c646891"},
|
{file = "Pillow-10.0.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:8f06be50669087250f319b706decf69ca71fdecd829091a37cc89398ca4dc17a"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d5db32e2a6ccbb3d34d87c87b432959e0db29755727afb37290e10f6e8e62614"},
|
{file = "Pillow-10.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:50bd5f1ebafe9362ad622072a1d2f5850ecfa44303531ff14353a4059113b12d"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edf4392b77bdc81f36e92d3a07a5cd072f90253197f4a52a55a8cec48a12483b"},
|
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6a90167bcca1216606223a05e2cf991bb25b14695c518bc65639463d7db722d"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:520f2a520dc040512699f20fa1c363eed506e94248d71f85412b625026f6142c"},
|
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f11c9102c56ffb9ca87134bd025a43d2aba3f1155f508eff88f694b33a9c6d19"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:8c11160913e3dd06c8ffdb5f233a4f254cb449f4dfc0f8f4549eda9e542c93d1"},
|
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:186f7e04248103482ea6354af6d5bcedb62941ee08f7f788a1c7707bc720c66f"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a74ba0c356aaa3bb8e3eb79606a87669e7ec6444be352870623025d75a14a2bf"},
|
{file = "Pillow-10.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:0462b1496505a3462d0f35dc1c4d7b54069747d65d00ef48e736acda2c8cbdff"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5d0dae4cfd56969d23d94dc8e89fb6a217be461c69090768227beb8ed28c0a3"},
|
{file = "Pillow-10.0.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d889b53ae2f030f756e61a7bff13684dcd77e9af8b10c6048fb2c559d6ed6eaf"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22c10cc517668d44b211717fd9775799ccec4124b9a7f7b3635fc5386e584992"},
|
{file = "Pillow-10.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:552912dbca585b74d75279a7570dd29fa43b6d93594abb494ebb31ac19ace6bd"},
|
||||||
{file = "Pillow-10.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:dffe31a7f47b603318c609f378ebcd57f1554a3a6a8effbc59c3c69f804296de"},
|
{file = "Pillow-10.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:787bb0169d2385a798888e1122c980c6eff26bf941a8ea79747d35d8f9210ca0"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:9fb218c8a12e51d7ead2a7c9e101a04982237d4855716af2e9499306728fb485"},
|
{file = "Pillow-10.0.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:fd2a5403a75b54661182b75ec6132437a181209b901446ee5724b589af8edef1"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d35e3c8d9b1268cbf5d3670285feb3528f6680420eafe35cccc686b73c1e330f"},
|
{file = "Pillow-10.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2d7e91b4379f7a76b31c2dda84ab9e20c6220488e50f7822e59dac36b0cd92b1"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ed64f9ca2f0a95411e88a4efbd7a29e5ce2cea36072c53dd9d26d9c76f753b3"},
|
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19e9adb3f22d4c416e7cd79b01375b17159d6990003633ff1d8377e21b7f1b21"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b6eb5502f45a60a3f411c63187db83a3d3107887ad0d036c13ce836f8a36f1d"},
|
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93139acd8109edcdeffd85e3af8ae7d88b258b3a1e13a038f542b79b6d255c54"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:c1fbe7621c167ecaa38ad29643d77a9ce7311583761abf7836e1510c580bf3dd"},
|
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:92a23b0431941a33242b1f0ce6c88a952e09feeea9af4e8be48236a68ffe2205"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cd25d2a9d2b36fcb318882481367956d2cf91329f6892fe5d385c346c0649629"},
|
{file = "Pillow-10.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:cbe68deb8580462ca0d9eb56a81912f59eb4542e1ef8f987405e35a0179f4ea2"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3b08d4cc24f471b2c8ca24ec060abf4bebc6b144cb89cba638c720546b1cf538"},
|
{file = "Pillow-10.0.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:522ff4ac3aaf839242c6f4e5b406634bfea002469656ae8358644fc6c4856a3b"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d737a602fbd82afd892ca746392401b634e278cb65d55c4b7a8f48e9ef8d008d"},
|
{file = "Pillow-10.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:84efb46e8d881bb06b35d1d541aa87f574b58e87f781cbba8d200daa835b42e1"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3a82c40d706d9aa9734289740ce26460a11aeec2d9c79b7af87bb35f0073c12f"},
|
{file = "Pillow-10.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:898f1d306298ff40dc1b9ca24824f0488f6f039bc0e25cfb549d3195ffa17088"},
|
||||||
{file = "Pillow-10.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:bc2ec7c7b5d66b8ec9ce9f720dbb5fa4bace0f545acd34870eff4a369b44bf37"},
|
{file = "Pillow-10.0.1-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:bcf1207e2f2385a576832af02702de104be71301c2696d0012b1b93fe34aaa5b"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:d80cf684b541685fccdd84c485b31ce73fc5c9b5d7523bf1394ce134a60c6883"},
|
{file = "Pillow-10.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d6c9049c6274c1bb565021367431ad04481ebb54872edecfcd6088d27edd6ed"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76de421f9c326da8f43d690110f0e79fe3ad1e54be811545d7d91898b4c8493e"},
|
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28444cb6ad49726127d6b340217f0627abc8732f1194fd5352dec5e6a0105635"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ff539a12457809666fef6624684c008e00ff6bf455b4b89fd00a140eecd640"},
|
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de596695a75496deb3b499c8c4f8e60376e0516e1a774e7bc046f0f48cd620ad"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce543ed15570eedbb85df19b0a1a7314a9c8141a36ce089c0a894adbfccb4568"},
|
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:2872f2d7846cf39b3dbff64bc1104cc48c76145854256451d33c5faa55c04d1a"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:685ac03cc4ed5ebc15ad5c23bc555d68a87777586d970c2c3e216619a5476223"},
|
{file = "Pillow-10.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4ce90f8a24e1c15465048959f1e94309dfef93af272633e8f37361b824532e91"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d72e2ecc68a942e8cf9739619b7f408cc7b272b279b56b2c83c6123fcfa5cdff"},
|
{file = "Pillow-10.0.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ee7810cf7c83fa227ba9125de6084e5e8b08c59038a7b2c9045ef4dde61663b4"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d50b6aec14bc737742ca96e85d6d0a5f9bfbded018264b3b70ff9d8c33485551"},
|
{file = "Pillow-10.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b1be1c872b9b5fcc229adeadbeb51422a9633abd847c0ff87dc4ef9bb184ae08"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:00e65f5e822decd501e374b0650146063fbb30a7264b4d2744bdd7b913e0cab5"},
|
{file = "Pillow-10.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:98533fd7fa764e5f85eebe56c8e4094db912ccbe6fbf3a58778d543cadd0db08"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:f31f9fdbfecb042d046f9d91270a0ba28368a723302786c0009ee9b9f1f60199"},
|
{file = "Pillow-10.0.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:764d2c0daf9c4d40ad12fbc0abd5da3af7f8aa11daf87e4fa1b834000f4b6b0a"},
|
||||||
{file = "Pillow-10.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:1ce91b6ec08d866b14413d3f0bbdea7e24dfdc8e59f562bb77bc3fe60b6144ca"},
|
{file = "Pillow-10.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fcb59711009b0168d6ee0bd8fb5eb259c4ab1717b2f538bbf36bacf207ef7a68"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:349930d6e9c685c089284b013478d6f76e3a534e36ddfa912cde493f235372f3"},
|
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:697a06bdcedd473b35e50a7e7506b1d8ceb832dc238a336bd6f4f5aa91a4b500"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3a684105f7c32488f7153905a4e3015a3b6c7182e106fe3c37fbb5ef3e6994c3"},
|
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f665d1e6474af9f9da5e86c2a3a2d2d6204e04d5af9c06b9d42afa6ebde3f21"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b4f69b3700201b80bb82c3a97d5e9254084f6dd5fb5b16fc1a7b974260f89f43"},
|
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:2fa6dd2661838c66f1a5473f3b49ab610c98a128fc08afbe81b91a1f0bf8c51d"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f07ea8d2f827d7d2a49ecf1639ec02d75ffd1b88dcc5b3a61bbb37a8759ad8d"},
|
{file = "Pillow-10.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:3a04359f308ebee571a3127fdb1bd01f88ba6f6fb6d087f8dd2e0d9bff43f2a7"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:040586f7d37b34547153fa383f7f9aed68b738992380ac911447bb78f2abe530"},
|
{file = "Pillow-10.0.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:723bd25051454cea9990203405fa6b74e043ea76d4968166dfd2569b0210886a"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f88a0b92277de8e3ca715a0d79d68dc82807457dae3ab8699c758f07c20b3c51"},
|
{file = "Pillow-10.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:71671503e3015da1b50bd18951e2f9daf5b6ffe36d16f1eb2c45711a301521a7"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c7cf14a27b0d6adfaebb3ae4153f1e516df54e47e42dcc073d7b3d76111a8d86"},
|
{file = "Pillow-10.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:44e7e4587392953e5e251190a964675f61e4dae88d1e6edbe9f36d6243547ff3"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3400aae60685b06bb96f99a21e1ada7bc7a413d5f49bce739828ecd9391bb8f7"},
|
{file = "Pillow-10.0.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:3855447d98cced8670aaa63683808df905e956f00348732448b5a6df67ee5849"},
|
||||||
{file = "Pillow-10.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:dbc02381779d412145331789b40cc7b11fdf449e5d94f6bc0b080db0a56ea3f0"},
|
{file = "Pillow-10.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ed2d9c0704f2dc4fa980b99d565c0c9a543fe5101c25b3d60488b8ba80f0cce1"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:9211e7ad69d7c9401cfc0e23d49b69ca65ddd898976d660a2fa5904e3d7a9baa"},
|
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5bb289bb835f9fe1a1e9300d011eef4d69661bb9b34d5e196e5e82c4cb09b37"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:faaf07ea35355b01a35cb442dd950d8f1bb5b040a7787791a535de13db15ed90"},
|
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a0d3e54ab1df9df51b914b2233cf779a5a10dfd1ce339d0421748232cea9876"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9f72a021fbb792ce98306ffb0c348b3c9cb967dce0f12a49aa4c3d3fdefa967"},
|
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:2cc6b86ece42a11f16f55fe8903595eff2b25e0358dec635d0a701ac9586588f"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d"},
|
{file = "Pillow-10.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ca26ba5767888c84bf5a0c1a32f069e8204ce8c21d00a49c90dabeba00ce0145"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:76edb0a1fa2b4745fb0c99fb9fb98f8b180a1bbceb8be49b087e0b21867e77d3"},
|
{file = "Pillow-10.0.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f0b4b06da13275bc02adfeb82643c4a6385bd08d26f03068c2796f60d125f6f2"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:368ab3dfb5f49e312231b6f27b8820c823652b7cd29cfbd34090565a015e99ba"},
|
{file = "Pillow-10.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bc2e3069569ea9dbe88d6b8ea38f439a6aad8f6e7a6283a38edf61ddefb3a9bf"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:608bfdee0d57cf297d32bcbb3c728dc1da0907519d1784962c5f0c68bb93e5a3"},
|
{file = "Pillow-10.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8b451d6ead6e3500b6ce5c7916a43d8d8d25ad74b9102a629baccc0808c54971"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5c6e3df6bdd396749bafd45314871b3d0af81ff935b2d188385e970052091017"},
|
{file = "Pillow-10.0.1-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:32bec7423cdf25c9038fef614a853c9d25c07590e1a870ed471f47fb80b244db"},
|
||||||
{file = "Pillow-10.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:7be600823e4c8631b74e4a0d38384c73f680e6105a7d3c6824fcf226c178c7e6"},
|
{file = "Pillow-10.0.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7cf63d2c6928b51d35dfdbda6f2c1fddbe51a6bc4a9d4ee6ea0e11670dd981e"},
|
||||||
{file = "Pillow-10.0.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:92be919bbc9f7d09f7ae343c38f5bb21c973d2576c1d45600fce4b74bafa7ac0"},
|
{file = "Pillow-10.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f6d3d4c905e26354e8f9d82548475c46d8e0889538cb0657aa9c6f0872a37aa4"},
|
||||||
{file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8182b523b2289f7c415f589118228d30ac8c355baa2f3194ced084dac2dbba"},
|
{file = "Pillow-10.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:847e8d1017c741c735d3cd1883fa7b03ded4f825a6e5fcb9378fd813edee995f"},
|
||||||
{file = "Pillow-10.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:38250a349b6b390ee6047a62c086d3817ac69022c127f8a5dc058c31ccef17f3"},
|
{file = "Pillow-10.0.1-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:7f771e7219ff04b79e231d099c0a28ed83aa82af91fd5fa9fdb28f5b8d5addaf"},
|
||||||
{file = "Pillow-10.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:88af2003543cc40c80f6fca01411892ec52b11021b3dc22ec3bc9d5afd1c5334"},
|
{file = "Pillow-10.0.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:459307cacdd4138edee3875bbe22a2492519e060660eaf378ba3b405d1c66317"},
|
||||||
{file = "Pillow-10.0.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:c189af0545965fa8d3b9613cfdb0cd37f9d71349e0f7750e1fd704648d475ed2"},
|
{file = "Pillow-10.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:b059ac2c4c7a97daafa7dc850b43b2d3667def858a4f112d1aa082e5c3d6cf7d"},
|
||||||
{file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce7b031a6fc11365970e6a5686d7ba8c63e4c1cf1ea143811acbb524295eabed"},
|
{file = "Pillow-10.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d6caf3cd38449ec3cd8a68b375e0c6fe4b6fd04edb6c9766b55ef84a6e8ddf2d"},
|
||||||
{file = "Pillow-10.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:db24668940f82321e746773a4bc617bfac06ec831e5c88b643f91f122a785684"},
|
{file = "Pillow-10.0.1.tar.gz", hash = "sha256:d72967b06be9300fed5cfbc8b5bafceec48bf7cdc7dab66b1d2549035287191d"},
|
||||||
{file = "Pillow-10.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:efe8c0681042536e0d06c11f48cebe759707c9e9abf880ee213541c5b46c5bf3"},
|
|
||||||
{file = "Pillow-10.0.0.tar.gz", hash = "sha256:9c82b5b3e043c7af0d95792d0d20ccf68f61a1fec6b3530e718b688422727396"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@ -1428,24 +1442,24 @@ testing = ["pytest", "pytest-benchmark"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "protobuf"
|
name = "protobuf"
|
||||||
version = "4.24.2"
|
version = "4.24.3"
|
||||||
description = ""
|
description = ""
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "protobuf-4.24.2-cp310-abi3-win32.whl", hash = "sha256:58e12d2c1aa428ece2281cef09bbaa6938b083bcda606db3da4e02e991a0d924"},
|
{file = "protobuf-4.24.3-cp310-abi3-win32.whl", hash = "sha256:20651f11b6adc70c0f29efbe8f4a94a74caf61b6200472a9aea6e19898f9fcf4"},
|
||||||
{file = "protobuf-4.24.2-cp310-abi3-win_amd64.whl", hash = "sha256:77700b55ba41144fc64828e02afb41901b42497b8217b558e4a001f18a85f2e3"},
|
{file = "protobuf-4.24.3-cp310-abi3-win_amd64.whl", hash = "sha256:3d42e9e4796a811478c783ef63dc85b5a104b44aaaca85d4864d5b886e4b05e3"},
|
||||||
{file = "protobuf-4.24.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:237b9a50bd3b7307d0d834c1b0eb1a6cd47d3f4c2da840802cd03ea288ae8880"},
|
{file = "protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:6e514e8af0045be2b56e56ae1bb14f43ce7ffa0f68b1c793670ccbe2c4fc7d2b"},
|
||||||
{file = "protobuf-4.24.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:25ae91d21e3ce8d874211110c2f7edd6384816fb44e06b2867afe35139e1fd1c"},
|
{file = "protobuf-4.24.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:ba53c2f04798a326774f0e53b9c759eaef4f6a568ea7072ec6629851c8435959"},
|
||||||
{file = "protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:c00c3c7eb9ad3833806e21e86dca448f46035242a680f81c3fe068ff65e79c74"},
|
{file = "protobuf-4.24.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:f6ccbcf027761a2978c1406070c3788f6de4a4b2cc20800cc03d52df716ad675"},
|
||||||
{file = "protobuf-4.24.2-cp37-cp37m-win32.whl", hash = "sha256:4e69965e7e54de4db989289a9b971a099e626f6167a9351e9d112221fc691bc1"},
|
{file = "protobuf-4.24.3-cp37-cp37m-win32.whl", hash = "sha256:1b182c7181a2891e8f7f3a1b5242e4ec54d1f42582485a896e4de81aa17540c2"},
|
||||||
{file = "protobuf-4.24.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c5cdd486af081bf752225b26809d2d0a85e575b80a84cde5172a05bbb1990099"},
|
{file = "protobuf-4.24.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b0271a701e6782880d65a308ba42bc43874dabd1a0a0f41f72d2dac3b57f8e76"},
|
||||||
{file = "protobuf-4.24.2-cp38-cp38-win32.whl", hash = "sha256:6bd26c1fa9038b26c5c044ee77e0ecb18463e957fefbaeb81a3feb419313a54e"},
|
{file = "protobuf-4.24.3-cp38-cp38-win32.whl", hash = "sha256:e29d79c913f17a60cf17c626f1041e5288e9885c8579832580209de8b75f2a52"},
|
||||||
{file = "protobuf-4.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb7aa97c252279da65584af0456f802bd4b2de429eb945bbc9b3d61a42a8cd16"},
|
{file = "protobuf-4.24.3-cp38-cp38-win_amd64.whl", hash = "sha256:067f750169bc644da2e1ef18c785e85071b7c296f14ac53e0900e605da588719"},
|
||||||
{file = "protobuf-4.24.2-cp39-cp39-win32.whl", hash = "sha256:2b23bd6e06445699b12f525f3e92a916f2dcf45ffba441026357dea7fa46f42b"},
|
{file = "protobuf-4.24.3-cp39-cp39-win32.whl", hash = "sha256:2da777d34b4f4f7613cdf85c70eb9a90b1fbef9d36ae4a0ccfe014b0b07906f1"},
|
||||||
{file = "protobuf-4.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:839952e759fc40b5d46be319a265cf94920174d88de31657d5622b5d8d6be5cd"},
|
{file = "protobuf-4.24.3-cp39-cp39-win_amd64.whl", hash = "sha256:f631bb982c5478e0c1c70eab383af74a84be66945ebf5dd6b06fc90079668d0b"},
|
||||||
{file = "protobuf-4.24.2-py3-none-any.whl", hash = "sha256:3b7b170d3491ceed33f723bbf2d5a260f8a4e23843799a3906f16ef736ef251e"},
|
{file = "protobuf-4.24.3-py3-none-any.whl", hash = "sha256:f6f8dc65625dadaad0c8545319c2e2f0424fede988368893ca3844261342c11a"},
|
||||||
{file = "protobuf-4.24.2.tar.gz", hash = "sha256:7fda70797ddec31ddfa3576cbdcc3ddbb6b3078b737a1a87ab9136af0570cd6e"},
|
{file = "protobuf-4.24.3.tar.gz", hash = "sha256:12e9ad2ec079b833176d2921be2cb24281fa591f0b119b208b788adc48c2561d"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1517,13 +1531,13 @@ numpy = ">=1.16.6"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytest"
|
name = "pytest"
|
||||||
version = "7.4.0"
|
version = "7.4.2"
|
||||||
description = "pytest: simple powerful testing with Python"
|
description = "pytest: simple powerful testing with Python"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "pytest-7.4.0-py3-none-any.whl", hash = "sha256:78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32"},
|
{file = "pytest-7.4.2-py3-none-any.whl", hash = "sha256:1d881c6124e08ff0a1bb75ba3ec0bfd8b5354a01c194ddd5a0a870a48d99b002"},
|
||||||
{file = "pytest-7.4.0.tar.gz", hash = "sha256:b4bf8c45bd59934ed84001ad51e11b4ee40d40a1229d2c79f9c592b0a3f6bd8a"},
|
{file = "pytest-7.4.2.tar.gz", hash = "sha256:a766259cfab564a2ad52cb1aae1b881a75c3eb7e34ca3779697c23ed47c47069"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -1553,13 +1567,13 @@ six = ">=1.5"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytz"
|
name = "pytz"
|
||||||
version = "2023.3"
|
version = "2023.3.post1"
|
||||||
description = "World timezone definitions, modern and historical"
|
description = "World timezone definitions, modern and historical"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"},
|
{file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
|
||||||
{file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"},
|
{file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1916,19 +1930,19 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "setuptools"
|
name = "setuptools"
|
||||||
version = "68.1.2"
|
version = "68.2.2"
|
||||||
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
description = "Easily download, build, install, upgrade, and uninstall Python packages"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"},
|
{file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"},
|
||||||
{file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"},
|
{file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
|
||||||
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
|
||||||
testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "six"
|
name = "six"
|
||||||
@ -2092,13 +2106,13 @@ telegram = ["requests"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "transformers"
|
name = "transformers"
|
||||||
version = "4.32.1"
|
version = "4.33.2"
|
||||||
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
|
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8.0"
|
python-versions = ">=3.8.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "transformers-4.32.1-py3-none-any.whl", hash = "sha256:b930d3dbd907a3f300cf49e54d63a56f8a0ab16b01a2c2a61ecff37c6de1da08"},
|
{file = "transformers-4.33.2-py3-none-any.whl", hash = "sha256:5a9a757bea5b5a1b94796805bcb5978b552208a3ac193f46edda66be6f4a5488"},
|
||||||
{file = "transformers-4.32.1.tar.gz", hash = "sha256:1edc8ae1de357d97c3d36b04412aa63d55e6fc0c4b39b419a7d380ed947d2252"},
|
{file = "transformers-4.33.2.tar.gz", hash = "sha256:47dd36f302afec86d9cdcacab61bbd0296e6bb02e64d2ed7855daaab14ee290e"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -2115,16 +2129,16 @@ tqdm = ">=4.27"
|
|||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
accelerate = ["accelerate (>=0.20.3)"]
|
accelerate = ["accelerate (>=0.20.3)"]
|
||||||
agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.9,!=1.12.0)"]
|
agents = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.10,!=1.12.0)"]
|
||||||
all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"]
|
all = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
|
||||||
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
||||||
codecarbon = ["codecarbon (==1.2.0)"]
|
codecarbon = ["codecarbon (==1.2.0)"]
|
||||||
deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"]
|
deepspeed = ["accelerate (>=0.20.3)", "deepspeed (>=0.9.3)"]
|
||||||
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"]
|
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "timeout-decorator"]
|
||||||
dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
dev = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
||||||
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"]
|
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "urllib3 (<2.0.0)"]
|
||||||
dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
dev-torch = ["GitPython (<3.1.19)", "Pillow (<10.0.0)", "accelerate (>=0.20.3)", "beautifulsoup4", "black (>=23.1,<24.0)", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune]", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (>=0.0.241,<=0.0.259)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "timeout-decorator", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
|
||||||
docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "torchaudio", "torchvision"]
|
docs = ["Pillow (<10.0.0)", "accelerate (>=0.20.3)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune]", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx", "timm", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "torchaudio", "torchvision"]
|
||||||
docs-specific = ["hf-doc-builder"]
|
docs-specific = ["hf-doc-builder"]
|
||||||
fairscale = ["fairscale (>0.3)"]
|
fairscale = ["fairscale (>0.3)"]
|
||||||
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
|
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
|
||||||
@ -2147,15 +2161,15 @@ sigopt = ["sigopt"]
|
|||||||
sklearn = ["scikit-learn"]
|
sklearn = ["scikit-learn"]
|
||||||
speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
||||||
testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"]
|
testing = ["GitPython (<3.1.19)", "beautifulsoup4", "black (>=23.1,<24.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "timeout-decorator"]
|
||||||
tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx"]
|
tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"]
|
||||||
tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.14)", "tensorflow-text (<2.14)", "tf2onnx"]
|
tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.15)", "tensorflow-text (<2.15)", "tf2onnx"]
|
||||||
tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
|
||||||
timm = ["timm"]
|
timm = ["timm"]
|
||||||
tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"]
|
tokenizers = ["tokenizers (>=0.11.1,!=0.11.3,<0.14)"]
|
||||||
torch = ["accelerate (>=0.20.3)", "torch (>=1.9,!=1.12.0)"]
|
torch = ["accelerate (>=0.20.3)", "torch (>=1.10,!=1.12.0)"]
|
||||||
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
|
||||||
torch-vision = ["Pillow (<10.0.0)", "torchvision"]
|
torch-vision = ["Pillow (<10.0.0)", "torchvision"]
|
||||||
torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.9,!=1.12.0)", "tqdm (>=4.27)"]
|
torchhub = ["filelock", "huggingface-hub (>=0.15.1,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.11.1,!=0.11.3,<0.14)", "torch (>=1.10,!=1.12.0)", "tqdm (>=4.27)"]
|
||||||
video = ["av (==9.2.0)", "decord (==0.6.0)"]
|
video = ["av (==9.2.0)", "decord (==0.6.0)"]
|
||||||
vision = ["Pillow (<10.0.0)"]
|
vision = ["Pillow (<10.0.0)"]
|
||||||
|
|
||||||
@ -2181,13 +2195,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "typing-extensions"
|
name = "typing-extensions"
|
||||||
version = "4.7.1"
|
version = "4.8.0"
|
||||||
description = "Backported and Experimental Type Hints for Python 3.7+"
|
description = "Backported and Experimental Type Hints for Python 3.8+"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"},
|
{file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
|
||||||
{file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"},
|
{file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -2203,13 +2217,13 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "urllib3"
|
name = "urllib3"
|
||||||
version = "2.0.4"
|
version = "2.0.5"
|
||||||
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
description = "HTTP library with thread-safe connection pooling, file post, and more."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "urllib3-2.0.4-py3-none-any.whl", hash = "sha256:de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4"},
|
{file = "urllib3-2.0.5-py3-none-any.whl", hash = "sha256:ef16afa8ba34a1f989db38e1dbbe0c302e4289a47856990d0682e374563ce35e"},
|
||||||
{file = "urllib3-2.0.4.tar.gz", hash = "sha256:8d22f86aae8ef5e410d4f539fde9ce6b2113a001bb4d189e0aed70642d602b11"},
|
{file = "urllib3-2.0.5.tar.gz", hash = "sha256:13abf37382ea2ce6fb744d4dad67838eec857c9f4f57009891805e0b5e123594"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-server"
|
name = "text-generation-server"
|
||||||
version = "1.0.3"
|
version = "1.1.0"
|
||||||
description = "Text Generation Inference Python gRPC Server"
|
description = "Text Generation Inference Python gRPC Server"
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
|
||||||
@ -54,5 +54,7 @@ priority = "explicit"
|
|||||||
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = ["poetry-core>=1.0.0"]
|
requires = [
|
||||||
|
"poetry-core>=1.0.0",
|
||||||
|
]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
@ -9,19 +9,19 @@ certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.13"
|
|||||||
charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
charset-normalizer==3.2.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
|
click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
|
colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
|
||||||
datasets==2.14.4 ; python_version >= "3.9" and python_version < "3.13"
|
datasets==2.14.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
filelock==3.12.3 ; python_version >= "3.9" and python_version < "3.13"
|
filelock==3.12.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
fsspec==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
fsspec[http]==2023.6.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
googleapis-common-protos==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
|
googleapis-common-protos==1.60.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpc-interceptor==0.15.3 ; python_version >= "3.9" and python_version < "3.13"
|
grpc-interceptor==0.15.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio-reflection==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio-reflection==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio-status==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio-status==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
grpcio==1.57.0 ; python_version >= "3.9" and python_version < "3.13"
|
grpcio==1.58.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
hf-transfer==0.1.3 ; python_version >= "3.9" and python_version < "3.13"
|
hf-transfer==0.1.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
|
huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
|
idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
@ -32,7 +32,7 @@ mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
|||||||
multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
|
multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
networkx==3.1 ; python_version >= "3.9" and python_version < "3.13"
|
networkx==3.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
numpy==1.25.2 ; python_version >= "3.9" and python_version < "3.13"
|
numpy==1.26.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
@ -43,32 +43,32 @@ opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13
|
|||||||
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
|
packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pandas==2.0.3 ; python_version >= "3.9" and python_version < "3.13"
|
pandas==2.1.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pillow==10.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
pillow==10.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
protobuf==4.24.2 ; python_version >= "3.9" and python_version < "3.13"
|
protobuf==4.24.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
psutil==5.9.5 ; python_version >= "3.9" and python_version < "3.13"
|
psutil==5.9.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pyarrow==13.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
pyarrow==13.0.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
|
python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pytz==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
regex==2023.8.8 ; python_version >= "3.9" and python_version < "3.13"
|
regex==2023.8.8 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
|
requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
|
safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
scipy==1.11.2 ; python_version >= "3.9" and python_version < "3.13"
|
scipy==1.11.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
setuptools==68.1.2 ; python_version >= "3.9" and python_version < "3.13"
|
setuptools==68.2.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
|
sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
texttable==1.6.7 ; python_version >= "3.9" and python_version < "3.13"
|
texttable==1.6.7 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
|
tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
torch==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
torch==2.0.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
|
tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
transformers==4.32.1 ; python_version >= "3.9" and python_version < "3.13"
|
transformers==4.33.2 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
|
typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
urllib3==2.0.4 ; python_version >= "3.9" and python_version < "3.13"
|
urllib3==2.0.5 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
|
win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
|
||||||
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
wrapt==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
xxhash==3.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
xxhash==3.3.0 ; python_version >= "3.9" and python_version < "3.13"
|
||||||
|
@ -45,12 +45,15 @@ def test_stopping_criteria_max():
|
|||||||
assert criteria(1, "") == (False, None)
|
assert criteria(1, "") == (False, None)
|
||||||
assert criteria(1, "") == (True, FinishReason.FINISH_REASON_LENGTH)
|
assert criteria(1, "") == (True, FinishReason.FINISH_REASON_LENGTH)
|
||||||
|
|
||||||
|
|
||||||
def test_batch_top_tokens():
|
def test_batch_top_tokens():
|
||||||
top_n_tokens = [0, 2, 3, 4, 5]
|
top_n_tokens = [0, 2, 3, 4, 5]
|
||||||
top_n_tokens_tensor = torch.tensor(top_n_tokens)
|
top_n_tokens_tensor = torch.tensor(top_n_tokens)
|
||||||
inp_logprobs = torch.tensor([[-1., -3., -4., -2., -3.]] * 5)
|
inp_logprobs = torch.tensor([[-1.0, -3.0, -4.0, -2.0, -3.0]] * 5)
|
||||||
|
|
||||||
topn_tok_ids, topn_tok_logprobs = batch_top_tokens(top_n_tokens, top_n_tokens_tensor, inp_logprobs)
|
topn_tok_ids, topn_tok_logprobs = batch_top_tokens(
|
||||||
|
top_n_tokens, top_n_tokens_tensor, inp_logprobs
|
||||||
|
)
|
||||||
|
|
||||||
assert topn_tok_ids[0] == []
|
assert topn_tok_ids[0] == []
|
||||||
assert topn_tok_ids[1] == [0, 3]
|
assert topn_tok_ids[1] == [0, 3]
|
||||||
|
@ -17,6 +17,8 @@ class Quantization(str, Enum):
|
|||||||
bitsandbytes_nf4 = "bitsandbytes-nf4"
|
bitsandbytes_nf4 = "bitsandbytes-nf4"
|
||||||
bitsandbytes_fp4 = "bitsandbytes-fp4"
|
bitsandbytes_fp4 = "bitsandbytes-fp4"
|
||||||
gptq = "gptq"
|
gptq = "gptq"
|
||||||
|
awq = "awq"
|
||||||
|
eetq = "eetq"
|
||||||
|
|
||||||
|
|
||||||
class Dtype(str, Enum):
|
class Dtype(str, Enum):
|
||||||
@ -123,8 +125,12 @@ def download_weights(
|
|||||||
|
|
||||||
if not is_local_model:
|
if not is_local_model:
|
||||||
try:
|
try:
|
||||||
adapter_config_filename = hf_hub_download(model_id, revision=revision, filename="adapter_config.json")
|
adapter_config_filename = hf_hub_download(
|
||||||
utils.download_and_unload_peft(model_id, revision, trust_remote_code=trust_remote_code)
|
model_id, revision=revision, filename="adapter_config.json"
|
||||||
|
)
|
||||||
|
utils.download_and_unload_peft(
|
||||||
|
model_id, revision, trust_remote_code=trust_remote_code
|
||||||
|
)
|
||||||
is_local_model = True
|
is_local_model = True
|
||||||
utils.weight_files(model_id, revision, extension)
|
utils.weight_files(model_id, revision, extension)
|
||||||
return
|
return
|
||||||
@ -177,8 +183,12 @@ def download_weights(
|
|||||||
import transformers
|
import transformers
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
if is_local_model:
|
||||||
config_filename = hf_hub_download(model_id, revision=revision, filename="config.json")
|
config_filename = os.path.join(model_id, "config.json")
|
||||||
|
else:
|
||||||
|
config_filename = hf_hub_download(
|
||||||
|
model_id, revision=revision, filename="config.json"
|
||||||
|
)
|
||||||
with open(config_filename, "r") as f:
|
with open(config_filename, "r") as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
architecture = config["architectures"][0]
|
architecture = config["architectures"][0]
|
||||||
@ -187,7 +197,6 @@ def download_weights(
|
|||||||
|
|
||||||
# Name for this varible depends on transformers version.
|
# Name for this varible depends on transformers version.
|
||||||
discard_names = getattr(class_, "_tied_weights_keys", [])
|
discard_names = getattr(class_, "_tied_weights_keys", [])
|
||||||
discard_names.extend(getattr(class_, "_keys_to_ignore_on_load_missing", []))
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
discard_names = []
|
discard_names = []
|
||||||
|
@ -67,6 +67,16 @@ if FLASH_ATTENTION:
|
|||||||
__all__.append(FlashLlama)
|
__all__.append(FlashLlama)
|
||||||
__all__.append(IDEFICSSharded)
|
__all__.append(IDEFICSSharded)
|
||||||
|
|
||||||
|
MISTRAL = True
|
||||||
|
try:
|
||||||
|
from text_generation_server.models.flash_mistral import FlashMistral
|
||||||
|
except ImportError as e:
|
||||||
|
logger.warning(f"Could not import Mistral model: {e}")
|
||||||
|
MISTRAL = False
|
||||||
|
|
||||||
|
if MISTRAL:
|
||||||
|
__all__.append(FlashMistral)
|
||||||
|
|
||||||
|
|
||||||
def get_model(
|
def get_model(
|
||||||
model_id: str,
|
model_id: str,
|
||||||
@ -153,7 +163,11 @@ def get_model(
|
|||||||
)
|
)
|
||||||
elif model_type == "mpt":
|
elif model_type == "mpt":
|
||||||
return MPTSharded(
|
return MPTSharded(
|
||||||
model_id, revision, quantize=quantize, trust_remote_code=trust_remote_code
|
model_id,
|
||||||
|
revision,
|
||||||
|
quantize=quantize,
|
||||||
|
dtype=dtype,
|
||||||
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif model_type == "gpt_neox":
|
elif model_type == "gpt_neox":
|
||||||
@ -182,7 +196,7 @@ def get_model(
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif model_type == "llama":
|
elif model_type == "llama" or model_type == "baichuan":
|
||||||
if FLASH_ATTENTION:
|
if FLASH_ATTENTION:
|
||||||
return FlashLlama(
|
return FlashLlama(
|
||||||
model_id,
|
model_id,
|
||||||
@ -233,7 +247,18 @@ def get_model(
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif model_type == "opt":
|
if model_type == "mistral":
|
||||||
|
if MISTRAL:
|
||||||
|
return FlashMistral(
|
||||||
|
model_id,
|
||||||
|
revision,
|
||||||
|
quantize=quantize,
|
||||||
|
dtype=dtype,
|
||||||
|
trust_remote_code=trust_remote_code,
|
||||||
|
)
|
||||||
|
raise NotImplementedError("Mistral model requires flash attention v2")
|
||||||
|
|
||||||
|
if model_type == "opt":
|
||||||
return OPTSharded(
|
return OPTSharded(
|
||||||
model_id,
|
model_id,
|
||||||
revision,
|
revision,
|
||||||
@ -242,7 +267,7 @@ def get_model(
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
elif model_type == "t5":
|
if model_type == "t5":
|
||||||
return T5Sharded(
|
return T5Sharded(
|
||||||
model_id,
|
model_id,
|
||||||
revision,
|
revision,
|
||||||
@ -250,7 +275,7 @@ def get_model(
|
|||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
elif model_type == "idefics":
|
if model_type == "idefics":
|
||||||
if FLASH_ATTENTION:
|
if FLASH_ATTENTION:
|
||||||
return IDEFICSSharded(
|
return IDEFICSSharded(
|
||||||
model_id,
|
model_id,
|
||||||
@ -268,10 +293,10 @@ def get_model(
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"gptq quantization is not supported for AutoModel, you can try to quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
|
"gptq quantization is not supported for AutoModel, you can try to quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
|
||||||
)
|
)
|
||||||
|
if quantize == "awq":
|
||||||
|
raise ValueError("awq quantization is not supported for AutoModel")
|
||||||
elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
|
elif (quantize == "bitsandbytes-fp4") or (quantize == "bitsandbytes-nf4"):
|
||||||
raise ValueError(
|
raise ValueError("4bit quantization is not supported for AutoModel")
|
||||||
"4bit quantization is not supported for AutoModel"
|
|
||||||
)
|
|
||||||
if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
|
if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
|
||||||
return CausalLM(
|
return CausalLM(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -51,7 +51,7 @@ class BLOOMSharded(CausalLM):
|
|||||||
dtype = torch.float16 if dtype is None else dtype
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
135
server/text_generation_server/models/cache_manager.py
Normal file
135
server/text_generation_server/models/cache_manager.py
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
import math
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from typing import Optional, List, Tuple
|
||||||
|
|
||||||
|
BLOCK_SIZE: int = 16
|
||||||
|
# Will be set in warmup
|
||||||
|
CACHE_MANAGER: Optional["CacheManager"] = None
|
||||||
|
|
||||||
|
|
||||||
|
class CacheManager:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
num_blocks: int,
|
||||||
|
num_layers: int,
|
||||||
|
num_heads: int,
|
||||||
|
head_size: int,
|
||||||
|
repeat_slots: bool,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
device: torch.device,
|
||||||
|
):
|
||||||
|
self.block_size = BLOCK_SIZE
|
||||||
|
self.num_blocks = num_blocks
|
||||||
|
self.repeat_slots = repeat_slots
|
||||||
|
|
||||||
|
element_size = torch.tensor([], dtype=dtype).element_size()
|
||||||
|
x = self.block_size // element_size
|
||||||
|
|
||||||
|
self.kv_cache = [
|
||||||
|
(
|
||||||
|
torch.empty(
|
||||||
|
(num_blocks, num_heads, head_size // x, self.block_size, x),
|
||||||
|
dtype=dtype,
|
||||||
|
device=device,
|
||||||
|
),
|
||||||
|
torch.empty(
|
||||||
|
(num_blocks, num_heads, head_size, self.block_size),
|
||||||
|
dtype=dtype,
|
||||||
|
device=device,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for _ in range(num_layers)
|
||||||
|
]
|
||||||
|
self.free_block_mask = torch.ones(num_blocks, dtype=torch.int32, device="cpu")
|
||||||
|
self.slots = torch.arange(
|
||||||
|
0, num_blocks * self.block_size, dtype=torch.int32
|
||||||
|
).view(num_blocks, self.block_size)
|
||||||
|
|
||||||
|
def allocate(
|
||||||
|
self,
|
||||||
|
needed_blocks_slots: List[Tuple[int, int]],
|
||||||
|
blocks: int,
|
||||||
|
max_blocks: int,
|
||||||
|
device: torch.device,
|
||||||
|
):
|
||||||
|
# Get free blocks indices by finding values in mask that are not set to 0
|
||||||
|
free_block_indices = self.free_block_mask.nonzero()
|
||||||
|
assert (
|
||||||
|
len(free_block_indices) >= blocks
|
||||||
|
), f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
|
||||||
|
|
||||||
|
# Slice by the number of required blocks
|
||||||
|
block_indices = free_block_indices[:blocks]
|
||||||
|
block_indices = block_indices.flatten()
|
||||||
|
|
||||||
|
# Padded block tables
|
||||||
|
block_tables_tensor = torch.zeros(
|
||||||
|
(len(needed_blocks_slots), max_blocks), dtype=torch.int32
|
||||||
|
)
|
||||||
|
|
||||||
|
# Allocate paged attention blocks
|
||||||
|
cumulative_blocks = 0
|
||||||
|
slots = []
|
||||||
|
block_tables = []
|
||||||
|
for i, (needed_blocks, needed_slots) in enumerate(needed_blocks_slots):
|
||||||
|
# Get allocated blocks for this sequence
|
||||||
|
allocated_blocks = block_indices[
|
||||||
|
cumulative_blocks : cumulative_blocks + needed_blocks
|
||||||
|
]
|
||||||
|
# Get slots for the allocated blocks
|
||||||
|
all_slots = self.slots[allocated_blocks].flatten()
|
||||||
|
|
||||||
|
# Repeat slots in the case of context sliding window
|
||||||
|
if needed_slots > len(all_slots) and self.repeat_slots:
|
||||||
|
repeats = math.ceil(needed_slots / len(all_slots))
|
||||||
|
all_slots = all_slots.repeat(repeats)
|
||||||
|
|
||||||
|
allocated_slots = all_slots[:needed_slots]
|
||||||
|
|
||||||
|
slots.append(allocated_slots)
|
||||||
|
block_tables.append(allocated_blocks.tolist())
|
||||||
|
block_tables_tensor[i, :needed_blocks] = allocated_blocks
|
||||||
|
cumulative_blocks += needed_blocks
|
||||||
|
|
||||||
|
block_tables = block_tables
|
||||||
|
block_tables_tensor = block_tables_tensor.to(device)
|
||||||
|
slots = torch.concat(slots).to(device)
|
||||||
|
|
||||||
|
# Allocate the required number of blocks by setting the mask to 0
|
||||||
|
self.free_block_mask[block_indices] = 0
|
||||||
|
|
||||||
|
return block_tables, block_tables_tensor, slots
|
||||||
|
|
||||||
|
def free(self, block_indices: Optional[List[int]]):
|
||||||
|
if block_indices is not None and block_indices:
|
||||||
|
# Reset mask
|
||||||
|
self.free_block_mask[block_indices] = 1
|
||||||
|
|
||||||
|
|
||||||
|
def set_cache_manager(
|
||||||
|
num_blocks: int,
|
||||||
|
num_layers: int,
|
||||||
|
num_heads: int,
|
||||||
|
head_size: int,
|
||||||
|
repeat_slots: bool,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
device: torch.device,
|
||||||
|
) -> CacheManager:
|
||||||
|
global CACHE_MANAGER
|
||||||
|
if CACHE_MANAGER is not None:
|
||||||
|
del CACHE_MANAGER
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
CACHE_MANAGER = CacheManager(
|
||||||
|
num_blocks, num_layers, num_heads, head_size, repeat_slots, dtype, device
|
||||||
|
)
|
||||||
|
return CACHE_MANAGER
|
||||||
|
|
||||||
|
|
||||||
|
def get_cache_manager() -> CacheManager:
|
||||||
|
global CACHE_MANAGER
|
||||||
|
if CACHE_MANAGER is None:
|
||||||
|
raise RuntimeError("cache manager was not initialized")
|
||||||
|
|
||||||
|
return CACHE_MANAGER
|
@ -492,7 +492,7 @@ class CausalLM(Model):
|
|||||||
raise ValueError("quantization is not available on CPU")
|
raise ValueError("quantization is not available on CPU")
|
||||||
|
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
@ -579,7 +579,7 @@ class CausalLM(Model):
|
|||||||
batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
|
batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
|
||||||
batch.top_n_tokens,
|
batch.top_n_tokens,
|
||||||
batch.top_n_tokens_tensor,
|
batch.top_n_tokens_tensor,
|
||||||
torch.softmax(logits[:, -1], -1),
|
torch.log_softmax(logits[:, -1], -1),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Zipped iterator
|
# Zipped iterator
|
||||||
@ -641,8 +641,14 @@ class CausalLM(Model):
|
|||||||
if i % self.world_size == self.rank:
|
if i % self.world_size == self.rank:
|
||||||
if stop:
|
if stop:
|
||||||
# Decode generated tokens
|
# Decode generated tokens
|
||||||
output_text = self.decode(
|
output_text, _, _ = self.decode_token(
|
||||||
all_input_ids[-stopping_criteria.current_tokens :, 0]
|
all_input_ids[:, 0],
|
||||||
|
prefix_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens
|
||||||
|
- 1,
|
||||||
|
read_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens,
|
||||||
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
# Get seed
|
# Get seed
|
||||||
if isinstance(next_token_chooser.choice, Sampling):
|
if isinstance(next_token_chooser.choice, Sampling):
|
||||||
|
@ -40,7 +40,10 @@ from text_generation_server.utils.layers import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
CUSTOM_KERNELS_ENABLED = False
|
CUSTOM_KERNELS_ENABLED = False
|
||||||
if not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True":
|
if (
|
||||||
|
torch.cuda.is_available()
|
||||||
|
and not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True"
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
from custom_kernels import fused_bloom_attention_cuda
|
from custom_kernels import fused_bloom_attention_cuda
|
||||||
|
|
||||||
|
@ -149,6 +149,27 @@ class LlamaRMSNorm(nn.Module):
|
|||||||
return normed_hidden_states, res
|
return normed_hidden_states, res
|
||||||
|
|
||||||
|
|
||||||
|
def load_attention(config, prefix, weights):
|
||||||
|
if config.num_attention_heads != config.num_key_value_heads:
|
||||||
|
return _load_gqa(config, prefix, weights)
|
||||||
|
else:
|
||||||
|
if config.model_type == "baichuan":
|
||||||
|
return TensorParallelColumnLinear.load_qkv(
|
||||||
|
config,
|
||||||
|
prefix=f"{prefix}.W_pack",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return TensorParallelColumnLinear.load_multi(
|
||||||
|
config,
|
||||||
|
prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
|
||||||
|
dim=0,
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _load_gqa(config, prefix: str, weights):
|
def _load_gqa(config, prefix: str, weights):
|
||||||
assert config.hidden_size % config.num_attention_heads == 0
|
assert config.hidden_size % config.num_attention_heads == 0
|
||||||
assert config.num_attention_heads % weights.process_group.size() == 0
|
assert config.num_attention_heads % weights.process_group.size() == 0
|
||||||
@ -159,7 +180,7 @@ def _load_gqa(config, prefix: str, weights):
|
|||||||
dim=0,
|
dim=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
if config.quantize != "gptq":
|
if config.quantize not in ["gptq", "awq"]:
|
||||||
weight = weight.to(dtype=weights.dtype).to(device=weights.device)
|
weight = weight.to(dtype=weights.dtype).to(device=weights.device)
|
||||||
|
|
||||||
head_size = config.hidden_size // config.num_attention_heads
|
head_size = config.hidden_size // config.num_attention_heads
|
||||||
@ -191,7 +212,10 @@ class FlashLlamaAttention(torch.nn.Module):
|
|||||||
# config=config, prefix=f"{prefix}.rotary_emb", weights=weights
|
# config=config, prefix=f"{prefix}.rotary_emb", weights=weights
|
||||||
# )
|
# )
|
||||||
self.rotary_emb = PositionRotaryEmbedding.static(
|
self.rotary_emb = PositionRotaryEmbedding.static(
|
||||||
config=config, dim=self.head_size, base=config.rope_theta, device=weights.device
|
config=config,
|
||||||
|
dim=self.head_size,
|
||||||
|
base=config.rope_theta,
|
||||||
|
device=weights.device,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.softmax_scale = self.head_size**-0.5
|
self.softmax_scale = self.head_size**-0.5
|
||||||
@ -205,16 +229,9 @@ class FlashLlamaAttention(torch.nn.Module):
|
|||||||
self.num_key_value_heads = (
|
self.num_key_value_heads = (
|
||||||
config.num_key_value_heads // weights.process_group.size()
|
config.num_key_value_heads // weights.process_group.size()
|
||||||
)
|
)
|
||||||
if config.num_attention_heads != config.num_key_value_heads:
|
|
||||||
self.query_key_value = _load_gqa(config, prefix, weights)
|
self.query_key_value = load_attention(config, prefix, weights)
|
||||||
else:
|
|
||||||
self.query_key_value = TensorParallelColumnLinear.load_multi(
|
|
||||||
config,
|
|
||||||
prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
|
|
||||||
dim=0,
|
|
||||||
weights=weights,
|
|
||||||
bias=False,
|
|
||||||
)
|
|
||||||
self.o_proj = TensorParallelRowLinear.load(
|
self.o_proj = TensorParallelRowLinear.load(
|
||||||
config,
|
config,
|
||||||
prefix=f"{prefix}.o_proj",
|
prefix=f"{prefix}.o_proj",
|
||||||
|
@ -0,0 +1,532 @@
|
|||||||
|
# coding=utf-8
|
||||||
|
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
|
||||||
|
#
|
||||||
|
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
|
||||||
|
# and OPT implementations in this library. It has been modified from its
|
||||||
|
# original forms to accommodate minor architectural differences compared
|
||||||
|
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.distributed
|
||||||
|
|
||||||
|
from torch import nn
|
||||||
|
from transformers.activations import ACT2FN
|
||||||
|
from transformers.configuration_utils import PretrainedConfig
|
||||||
|
from typing import Optional, List, Tuple
|
||||||
|
|
||||||
|
# Flash attention imports
|
||||||
|
import dropout_layer_norm
|
||||||
|
|
||||||
|
# vllm imports
|
||||||
|
import vllm_cache_ops
|
||||||
|
import vllm_attention_ops
|
||||||
|
|
||||||
|
from text_generation_server.utils.flash_attn import attention, HAS_FLASH_ATTN_V2
|
||||||
|
from text_generation_server.utils.layers import (
|
||||||
|
TensorParallelRowLinear,
|
||||||
|
TensorParallelColumnLinear,
|
||||||
|
TensorParallelEmbedding,
|
||||||
|
PositionRotaryEmbedding,
|
||||||
|
TensorParallelHead,
|
||||||
|
get_linear,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not HAS_FLASH_ATTN_V2:
|
||||||
|
raise ImportError("Mistral model requires flash attn v2")
|
||||||
|
|
||||||
|
|
||||||
|
class MistralConfig(PretrainedConfig):
|
||||||
|
model_type = "mistral"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
vocab_size=32000,
|
||||||
|
hidden_size=4096,
|
||||||
|
intermediate_size=14336,
|
||||||
|
num_hidden_layers=32,
|
||||||
|
num_attention_heads=32,
|
||||||
|
num_key_value_heads=8,
|
||||||
|
hidden_act="silu",
|
||||||
|
max_position_embeddings=4096 * 32,
|
||||||
|
initializer_range=0.02,
|
||||||
|
rms_norm_eps=1e-6,
|
||||||
|
use_cache=True,
|
||||||
|
pad_token_id=None,
|
||||||
|
bos_token_id=1,
|
||||||
|
eos_token_id=2,
|
||||||
|
pretraining_tp=1,
|
||||||
|
tie_word_embeddings=False,
|
||||||
|
rope_theta=10000.0,
|
||||||
|
sliding_window=4096,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
self.vocab_size = vocab_size
|
||||||
|
self.max_position_embeddings = max_position_embeddings
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.intermediate_size = intermediate_size
|
||||||
|
self.num_hidden_layers = num_hidden_layers
|
||||||
|
self.num_attention_heads = num_attention_heads
|
||||||
|
self.sliding_window = sliding_window
|
||||||
|
|
||||||
|
# for backward compatibility
|
||||||
|
if num_key_value_heads is None:
|
||||||
|
num_key_value_heads = num_attention_heads
|
||||||
|
|
||||||
|
self.num_key_value_heads = num_key_value_heads
|
||||||
|
self.hidden_act = hidden_act
|
||||||
|
self.initializer_range = initializer_range
|
||||||
|
self.rms_norm_eps = rms_norm_eps
|
||||||
|
self.pretraining_tp = pretraining_tp
|
||||||
|
self.use_cache = use_cache
|
||||||
|
self.rope_theta = rope_theta
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
pad_token_id=pad_token_id,
|
||||||
|
bos_token_id=bos_token_id,
|
||||||
|
eos_token_id=eos_token_id,
|
||||||
|
tie_word_embeddings=tie_word_embeddings,
|
||||||
|
**kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MistralRMSNorm(nn.Module):
|
||||||
|
def __init__(self, prefix, weights, eps=1e-6):
|
||||||
|
"""
|
||||||
|
LlamaRMSNorm is equivalent to T5LayerNorm
|
||||||
|
"""
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
weight = weights.get_tensor(f"{prefix}.weight")
|
||||||
|
self.weight = nn.Parameter(weight)
|
||||||
|
self.variance_epsilon = eps
|
||||||
|
|
||||||
|
def forward(self, hidden_states, residual=None):
|
||||||
|
if hidden_states.shape[-1] > 8192:
|
||||||
|
if residual is not None:
|
||||||
|
hidden_states += residual
|
||||||
|
residual = hidden_states
|
||||||
|
|
||||||
|
hidden_states = hidden_states.to(torch.float32)
|
||||||
|
variance = hidden_states.pow(2).mean(-1, keepdim=True)
|
||||||
|
hidden_states = hidden_states * torch.rsqrt(
|
||||||
|
variance + self.variance_epsilon
|
||||||
|
)
|
||||||
|
|
||||||
|
# convert into half-precision if necessary
|
||||||
|
if self.weight.dtype in [torch.float16, torch.bfloat16]:
|
||||||
|
hidden_states = hidden_states.to(self.weight.dtype)
|
||||||
|
|
||||||
|
return self.weight * hidden_states, residual
|
||||||
|
else:
|
||||||
|
# faster post attention rms norm
|
||||||
|
normed_hidden_states, res, *rest = dropout_layer_norm.dropout_add_ln_fwd(
|
||||||
|
hidden_states,
|
||||||
|
residual,
|
||||||
|
self.weight,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
0.0,
|
||||||
|
self.variance_epsilon,
|
||||||
|
1.0,
|
||||||
|
0,
|
||||||
|
None,
|
||||||
|
False,
|
||||||
|
True, # Activate RMSNorm
|
||||||
|
)
|
||||||
|
if res is None:
|
||||||
|
res = hidden_states
|
||||||
|
|
||||||
|
return normed_hidden_states, res
|
||||||
|
|
||||||
|
|
||||||
|
def load_attention(config, prefix, weights):
|
||||||
|
if config.num_attention_heads != config.num_key_value_heads:
|
||||||
|
return _load_gqa(config, prefix, weights)
|
||||||
|
else:
|
||||||
|
return TensorParallelColumnLinear.load_multi(
|
||||||
|
config,
|
||||||
|
prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
|
||||||
|
dim=0,
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_gqa(config, prefix: str, weights):
|
||||||
|
assert config.hidden_size % config.num_attention_heads == 0
|
||||||
|
assert config.num_attention_heads % weights.process_group.size() == 0
|
||||||
|
|
||||||
|
weight = weights.get_multi_weights_col(
|
||||||
|
prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
|
||||||
|
quantize=config.quantize,
|
||||||
|
dim=0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if config.quantize not in ["gptq", "awq"]:
|
||||||
|
weight = weight.to(dtype=weights.dtype).to(device=weights.device)
|
||||||
|
|
||||||
|
head_size = config.hidden_size // config.num_attention_heads
|
||||||
|
num_heads = config.num_attention_heads // weights.process_group.size()
|
||||||
|
num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
|
||||||
|
assert list(weight.shape) == [
|
||||||
|
(num_heads + 2 * num_key_value_heads) * head_size,
|
||||||
|
config.hidden_size,
|
||||||
|
], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
|
||||||
|
|
||||||
|
return TensorParallelColumnLinear(
|
||||||
|
get_linear(weight, bias=None, quantize=config.quantize)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class MistralAttention(torch.nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
prefix: str,
|
||||||
|
config,
|
||||||
|
weights,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.max_past = (
|
||||||
|
config.sliding_window if config.sliding_window is not None else 0
|
||||||
|
)
|
||||||
|
self.num_heads = config.num_attention_heads
|
||||||
|
self.hidden_size = config.hidden_size
|
||||||
|
self.head_size = self.hidden_size // self.num_heads
|
||||||
|
|
||||||
|
self.rotary_emb = PositionRotaryEmbedding.static(
|
||||||
|
config=config,
|
||||||
|
dim=self.head_size,
|
||||||
|
base=config.rope_theta,
|
||||||
|
device=weights.device,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.softmax_scale = self.head_size**-0.5
|
||||||
|
|
||||||
|
if self.num_heads % weights.process_group.size() != 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
|
||||||
|
f"and `num_shards`: {weights.process_group.size()}"
|
||||||
|
)
|
||||||
|
self.num_heads = self.num_heads // weights.process_group.size()
|
||||||
|
self.num_key_value_heads = (
|
||||||
|
config.num_key_value_heads // weights.process_group.size()
|
||||||
|
)
|
||||||
|
|
||||||
|
self.query_key_value = load_attention(config, prefix, weights)
|
||||||
|
|
||||||
|
self.o_proj = TensorParallelRowLinear.load(
|
||||||
|
config,
|
||||||
|
prefix=f"{prefix}.o_proj",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
self.num_groups = self.num_heads // self.num_key_value_heads
|
||||||
|
self.kv_head_mapping = torch.arange(
|
||||||
|
0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
|
||||||
|
).repeat_interleave(self.num_groups)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
hidden_states,
|
||||||
|
cos,
|
||||||
|
sin,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
kv_cache,
|
||||||
|
block_tables,
|
||||||
|
slots,
|
||||||
|
input_lengths,
|
||||||
|
max_s,
|
||||||
|
prefill_cache_indices,
|
||||||
|
):
|
||||||
|
qkv = self.query_key_value(hidden_states)
|
||||||
|
query, kv = qkv.split(
|
||||||
|
[
|
||||||
|
self.head_size * self.num_heads,
|
||||||
|
2 * self.head_size * self.num_key_value_heads,
|
||||||
|
],
|
||||||
|
dim=1,
|
||||||
|
)
|
||||||
|
query = query.view(-1, self.num_heads, self.head_size)
|
||||||
|
kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
|
||||||
|
|
||||||
|
self.rotary_emb(query, cos, sin)
|
||||||
|
self.rotary_emb(torch.select(kv, dim=1, index=0), cos, sin)
|
||||||
|
|
||||||
|
if prefill_cache_indices is not None:
|
||||||
|
kv_to_cache = kv[prefill_cache_indices]
|
||||||
|
else:
|
||||||
|
kv_to_cache = kv
|
||||||
|
|
||||||
|
vllm_cache_ops.reshape_and_cache(
|
||||||
|
kv_to_cache[:, 0], kv_to_cache[:, 1], kv_cache[0], kv_cache[1], slots
|
||||||
|
)
|
||||||
|
|
||||||
|
# output tensor
|
||||||
|
attn_output = torch.empty_like(query)
|
||||||
|
|
||||||
|
# Prefill
|
||||||
|
if cu_seqlen_prefill is not None:
|
||||||
|
# flash attention
|
||||||
|
attention(
|
||||||
|
query,
|
||||||
|
torch.select(kv, dim=1, index=0),
|
||||||
|
torch.select(kv, dim=1, index=1),
|
||||||
|
attn_output,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
max_s,
|
||||||
|
self.softmax_scale,
|
||||||
|
window_size_left=self.max_past,
|
||||||
|
)
|
||||||
|
# Decode
|
||||||
|
else:
|
||||||
|
# kv_cache[1] => [num_blocks, num_heads, head_size, block_size]
|
||||||
|
block_size = kv_cache[1].shape[3]
|
||||||
|
vllm_attention_ops.single_query_cached_kv_attention(
|
||||||
|
attn_output,
|
||||||
|
query,
|
||||||
|
kv_cache[0],
|
||||||
|
kv_cache[1],
|
||||||
|
self.kv_head_mapping,
|
||||||
|
self.softmax_scale,
|
||||||
|
block_tables,
|
||||||
|
input_lengths,
|
||||||
|
block_size,
|
||||||
|
max_s,
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
|
||||||
|
|
||||||
|
|
||||||
|
class MistralMLP(nn.Module):
|
||||||
|
def __init__(self, prefix, config, weights):
|
||||||
|
super().__init__()
|
||||||
|
act = config.hidden_act
|
||||||
|
self.act = (
|
||||||
|
ACT2FN[act]
|
||||||
|
if "gelu" not in act
|
||||||
|
else lambda x: torch.nn.functional.gelu(
|
||||||
|
x,
|
||||||
|
approximate="tanh"
|
||||||
|
if act in ["gelu_fast", "gelu_pytorch_tanh"]
|
||||||
|
else "none",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# Fuse gate and up proj
|
||||||
|
self.gate_up_proj = TensorParallelColumnLinear.load_multi(
|
||||||
|
config,
|
||||||
|
prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"],
|
||||||
|
weights=weights,
|
||||||
|
dim=0,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
self.down_proj = TensorParallelRowLinear.load(
|
||||||
|
config,
|
||||||
|
prefix=f"{prefix}.down_proj",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
|
)
|
||||||
|
self.intermediate_size = (
|
||||||
|
config.intermediate_size // weights.process_group.size()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, hidden_states):
|
||||||
|
gate_up_states = self.gate_up_proj(hidden_states)
|
||||||
|
gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size)
|
||||||
|
return self.down_proj(self.act(gate_up_states[:, 0]) * gate_up_states[:, 1])
|
||||||
|
|
||||||
|
|
||||||
|
class MistralLayer(nn.Module):
|
||||||
|
def __init__(self, layer_id, config, weights):
|
||||||
|
super().__init__()
|
||||||
|
prefix = f"model.layers.{layer_id}"
|
||||||
|
self.self_attn = MistralAttention(
|
||||||
|
prefix=f"{prefix}.self_attn", config=config, weights=weights
|
||||||
|
)
|
||||||
|
self.mlp = MistralMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
|
||||||
|
|
||||||
|
self.input_layernorm = MistralRMSNorm(
|
||||||
|
prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
|
||||||
|
)
|
||||||
|
self.post_attention_layernorm = MistralRMSNorm(
|
||||||
|
prefix=f"{prefix}.post_attention_layernorm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.rms_norm_eps,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
hidden_states,
|
||||||
|
residual,
|
||||||
|
cos,
|
||||||
|
sin,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
kv_cache,
|
||||||
|
block_tables,
|
||||||
|
slots,
|
||||||
|
input_lengths,
|
||||||
|
max_s,
|
||||||
|
prefill_cache_indices,
|
||||||
|
):
|
||||||
|
normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
|
||||||
|
|
||||||
|
# Self Attention
|
||||||
|
attn_output = self.self_attn(
|
||||||
|
normed_hidden_states,
|
||||||
|
cos,
|
||||||
|
sin,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
kv_cache,
|
||||||
|
block_tables,
|
||||||
|
slots,
|
||||||
|
input_lengths,
|
||||||
|
max_s,
|
||||||
|
prefill_cache_indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
# faster post attention rms norm
|
||||||
|
normed_attn_res_output, attn_res = self.post_attention_layernorm(
|
||||||
|
attn_output, res
|
||||||
|
)
|
||||||
|
|
||||||
|
mlp_output = self.mlp(normed_attn_res_output)
|
||||||
|
|
||||||
|
return mlp_output, attn_res
|
||||||
|
|
||||||
|
|
||||||
|
class MistralModel(torch.nn.Module):
|
||||||
|
def __init__(self, config, weights):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
process_group = weights.process_group
|
||||||
|
self.tp_rank = process_group.rank()
|
||||||
|
self.tp_world_size = process_group.size()
|
||||||
|
self.embed_tokens = TensorParallelEmbedding(
|
||||||
|
prefix="model.embed_tokens", weights=weights
|
||||||
|
)
|
||||||
|
self.layers = nn.ModuleList(
|
||||||
|
[
|
||||||
|
MistralLayer(
|
||||||
|
layer_id,
|
||||||
|
config,
|
||||||
|
weights,
|
||||||
|
)
|
||||||
|
for layer_id in range(config.num_hidden_layers)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
self.norm = MistralRMSNorm(
|
||||||
|
prefix="model.norm", weights=weights, eps=config.rms_norm_eps
|
||||||
|
)
|
||||||
|
|
||||||
|
self.gradient_checkpointing = False
|
||||||
|
|
||||||
|
self.head_size = self.layers[0].self_attn.head_size
|
||||||
|
self.num_heads = self.layers[0].self_attn.num_heads
|
||||||
|
self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: torch.Tensor,
|
||||||
|
position_ids: torch.Tensor,
|
||||||
|
cu_seqlen_prefill: Optional[torch.Tensor],
|
||||||
|
kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
|
||||||
|
block_tables: torch.Tensor,
|
||||||
|
slots: torch.Tensor,
|
||||||
|
input_lengths: torch.Tensor,
|
||||||
|
max_s: int,
|
||||||
|
prefill_cache_indices: Optional[torch.Tensor],
|
||||||
|
) -> torch.Tensor:
|
||||||
|
hidden_states = self.embed_tokens(input_ids)
|
||||||
|
|
||||||
|
# Get rotary cos and sin for this forward
|
||||||
|
# Avoid to index in each layer
|
||||||
|
cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
|
||||||
|
position_ids, max_s, hidden_states.dtype
|
||||||
|
)
|
||||||
|
|
||||||
|
residual = None
|
||||||
|
for i, layer in enumerate(self.layers):
|
||||||
|
hidden_states, residual = layer(
|
||||||
|
hidden_states,
|
||||||
|
residual,
|
||||||
|
cos,
|
||||||
|
sin,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
kv_cache[i],
|
||||||
|
block_tables,
|
||||||
|
slots,
|
||||||
|
input_lengths,
|
||||||
|
max_s,
|
||||||
|
prefill_cache_indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
hidden_states, _ = self.norm(hidden_states, residual)
|
||||||
|
|
||||||
|
return hidden_states
|
||||||
|
|
||||||
|
|
||||||
|
class FlashMistralForCausalLM(torch.nn.Module):
|
||||||
|
def __init__(self, config, weights):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.model = MistralModel(config, weights)
|
||||||
|
self.lm_head = TensorParallelHead.load(
|
||||||
|
config,
|
||||||
|
prefix="lm_head",
|
||||||
|
weights=weights,
|
||||||
|
)
|
||||||
|
self.max_past = config.sliding_window
|
||||||
|
if self.max_past is None:
|
||||||
|
raise ValueError("max_past cannot be None")
|
||||||
|
|
||||||
|
def forward(
|
||||||
|
self,
|
||||||
|
input_ids: torch.Tensor,
|
||||||
|
position_ids: torch.Tensor,
|
||||||
|
cu_seqlen_prefill: Optional[torch.Tensor],
|
||||||
|
kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
|
||||||
|
block_tables: torch.Tensor,
|
||||||
|
slots: torch.Tensor,
|
||||||
|
input_lengths: torch.Tensor,
|
||||||
|
max_s: int,
|
||||||
|
prefill_cache_indices: Optional[torch.Tensor],
|
||||||
|
lm_head_indices: Optional[torch.Tensor] = None,
|
||||||
|
) -> torch.Tensor:
|
||||||
|
if prefill_cache_indices is not None:
|
||||||
|
# Slots also need to be sliced as it has the same size as the whole kv tensor
|
||||||
|
slots = slots[prefill_cache_indices]
|
||||||
|
else:
|
||||||
|
# Clamp in decode mode as paged attention requires clamped values whereas the flash attention
|
||||||
|
# kernel requires the true values
|
||||||
|
max_s = min(self.max_past, max_s)
|
||||||
|
input_lengths = torch.clamp(input_lengths, max=self.max_past)
|
||||||
|
|
||||||
|
hidden_states = self.model(
|
||||||
|
input_ids,
|
||||||
|
position_ids,
|
||||||
|
cu_seqlen_prefill,
|
||||||
|
kv_cache,
|
||||||
|
block_tables,
|
||||||
|
slots,
|
||||||
|
input_lengths,
|
||||||
|
max_s,
|
||||||
|
prefill_cache_indices,
|
||||||
|
)
|
||||||
|
if lm_head_indices is not None:
|
||||||
|
hidden_states = hidden_states[lm_head_indices]
|
||||||
|
logits = self.lm_head(hidden_states)
|
||||||
|
return logits
|
@ -20,7 +20,12 @@ import numpy as np
|
|||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
|
from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
|
||||||
from transformers.image_transforms import resize, to_channel_dimension_format, rescale, normalize
|
from transformers.image_transforms import (
|
||||||
|
resize,
|
||||||
|
to_channel_dimension_format,
|
||||||
|
rescale,
|
||||||
|
normalize,
|
||||||
|
)
|
||||||
from transformers.image_utils import (
|
from transformers.image_utils import (
|
||||||
ChannelDimension,
|
ChannelDimension,
|
||||||
ImageInput,
|
ImageInput,
|
||||||
@ -121,7 +126,11 @@ class IdeficsImageProcessor(BaseImageProcessor):
|
|||||||
a PyTorch tensor of the processed images
|
a PyTorch tensor of the processed images
|
||||||
"""
|
"""
|
||||||
image_size = image_size if image_size is not None else self.image_size
|
image_size = image_size if image_size is not None else self.image_size
|
||||||
image_num_channels = image_num_channels if image_num_channels is not None else self.image_num_channels
|
image_num_channels = (
|
||||||
|
image_num_channels
|
||||||
|
if image_num_channels is not None
|
||||||
|
else self.image_num_channels
|
||||||
|
)
|
||||||
image_mean = image_mean if image_mean is not None else self.image_mean
|
image_mean = image_mean if image_mean is not None else self.image_mean
|
||||||
image_std = image_std if image_std is not None else self.image_std
|
image_std = image_std if image_std is not None else self.image_std
|
||||||
size = (image_size, image_size)
|
size = (image_size, image_size)
|
||||||
@ -160,9 +169,13 @@ class IdeficsImageProcessor(BaseImageProcessor):
|
|||||||
images = [resize(x, size, resample=PILImageResampling.BICUBIC) for x in images]
|
images = [resize(x, size, resample=PILImageResampling.BICUBIC) for x in images]
|
||||||
images = [self.rescale(image=image, scale=1 / 255) for image in images]
|
images = [self.rescale(image=image, scale=1 / 255) for image in images]
|
||||||
images = [self.normalize(x, mean=image_mean, std=image_std) for x in images]
|
images = [self.normalize(x, mean=image_mean, std=image_std) for x in images]
|
||||||
images = [to_channel_dimension_format(x, ChannelDimension.FIRST) for x in images]
|
images = [
|
||||||
|
to_channel_dimension_format(x, ChannelDimension.FIRST) for x in images
|
||||||
|
]
|
||||||
# TODO: this converts to torch tensors - switch to convert_to_tensors once it becomes available
|
# TODO: this converts to torch tensors - switch to convert_to_tensors once it becomes available
|
||||||
images = BatchFeature(data={"pixel_values": images}, tensor_type=TensorType.PYTORCH)["pixel_values"]
|
images = BatchFeature(
|
||||||
|
data={"pixel_values": images}, tensor_type=TensorType.PYTORCH
|
||||||
|
)["pixel_values"]
|
||||||
|
|
||||||
return images
|
return images
|
||||||
|
|
||||||
@ -185,7 +198,9 @@ class IdeficsImageProcessor(BaseImageProcessor):
|
|||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return Image.open(BytesIO(response.content))
|
return Image.open(BytesIO(response.content))
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"only a single or a list of entries is supported but got type={type(image_url_or_urls)}")
|
raise ValueError(
|
||||||
|
f"only a single or a list of entries is supported but got type={type(image_url_or_urls)}"
|
||||||
|
)
|
||||||
|
|
||||||
def rescale(
|
def rescale(
|
||||||
self,
|
self,
|
||||||
@ -255,10 +270,9 @@ class IdeficsImageProcessor(BaseImageProcessor):
|
|||||||
`np.ndarray`: The normalized image.
|
`np.ndarray`: The normalized image.
|
||||||
"""
|
"""
|
||||||
# TODO 4.32
|
# TODO 4.32
|
||||||
return normalize(
|
return normalize(image, mean=mean, std=std, data_format=data_format, **kwargs)
|
||||||
image, mean=mean, std=std, data_format=data_format, **kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
|
|
||||||
transformers.IdeficsImageProcessor = IdeficsImageProcessor
|
transformers.IdeficsImageProcessor = IdeficsImageProcessor
|
||||||
|
@ -28,7 +28,11 @@ from torch.nn import CrossEntropyLoss
|
|||||||
|
|
||||||
from transformers import PreTrainedModel
|
from transformers import PreTrainedModel
|
||||||
from transformers.activations import ACT2FN
|
from transformers.activations import ACT2FN
|
||||||
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, dataclass
|
from transformers.modeling_outputs import (
|
||||||
|
BaseModelOutputWithPast,
|
||||||
|
CausalLMOutputWithPast,
|
||||||
|
dataclass,
|
||||||
|
)
|
||||||
from transformers.modeling_utils import PretrainedConfig
|
from transformers.modeling_utils import PretrainedConfig
|
||||||
from transformers.utils import (
|
from transformers.utils import (
|
||||||
add_start_docstrings,
|
add_start_docstrings,
|
||||||
@ -37,8 +41,12 @@ from transformers.utils import (
|
|||||||
replace_return_docstrings,
|
replace_return_docstrings,
|
||||||
)
|
)
|
||||||
from text_generation_server.models.custom_modeling.idefics_config import IdeficsConfig
|
from text_generation_server.models.custom_modeling.idefics_config import IdeficsConfig
|
||||||
from text_generation_server.models.custom_modeling.idefics_vision import IdeficsVisionTransformer
|
from text_generation_server.models.custom_modeling.idefics_vision import (
|
||||||
from text_generation_server.models.custom_modeling.idefics_perceiver import IdeficsPerceiverResampler
|
IdeficsVisionTransformer,
|
||||||
|
)
|
||||||
|
from text_generation_server.models.custom_modeling.idefics_perceiver import (
|
||||||
|
IdeficsPerceiverResampler,
|
||||||
|
)
|
||||||
from text_generation_server.utils.layers import (
|
from text_generation_server.utils.layers import (
|
||||||
TensorParallelColumnLinear,
|
TensorParallelColumnLinear,
|
||||||
TensorParallelEmbedding,
|
TensorParallelEmbedding,
|
||||||
@ -49,10 +57,12 @@ from text_generation_server.utils.layers import (
|
|||||||
)
|
)
|
||||||
import dropout_layer_norm
|
import dropout_layer_norm
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class BaseModelOutputWithPastImage(BaseModelOutputWithPast):
|
class BaseModelOutputWithPastImage(BaseModelOutputWithPast):
|
||||||
image_hidden_states: Optional[torch.FloatTensor] = None
|
image_hidden_states: Optional[torch.FloatTensor] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class CausalLMOutputWithPastImage(CausalLMOutputWithPast):
|
class CausalLMOutputWithPastImage(CausalLMOutputWithPast):
|
||||||
image_hidden_states: Optional[torch.FloatTensor] = None
|
image_hidden_states: Optional[torch.FloatTensor] = None
|
||||||
@ -78,25 +88,39 @@ def expand_inputs_for_generation(
|
|||||||
**model_kwargs,
|
**model_kwargs,
|
||||||
):
|
):
|
||||||
expanded_return_idx = (
|
expanded_return_idx = (
|
||||||
torch.arange(input_ids.shape[0]).view(-1, 1).repeat(1, expand_size).view(-1).to(input_ids.device)
|
torch.arange(input_ids.shape[0])
|
||||||
|
.view(-1, 1)
|
||||||
|
.repeat(1, expand_size)
|
||||||
|
.view(-1)
|
||||||
|
.to(input_ids.device)
|
||||||
)
|
)
|
||||||
input_ids = input_ids.index_select(0, expanded_return_idx)
|
input_ids = input_ids.index_select(0, expanded_return_idx)
|
||||||
|
|
||||||
if "token_type_ids" in model_kwargs:
|
if "token_type_ids" in model_kwargs:
|
||||||
token_type_ids = model_kwargs["token_type_ids"]
|
token_type_ids = model_kwargs["token_type_ids"]
|
||||||
model_kwargs["token_type_ids"] = token_type_ids.index_select(0, expanded_return_idx)
|
model_kwargs["token_type_ids"] = token_type_ids.index_select(
|
||||||
|
0, expanded_return_idx
|
||||||
if attention_mask is not None:
|
)
|
||||||
model_kwargs["attention_mask"] = attention_mask.index_select(0, expanded_return_idx)
|
|
||||||
model_kwargs["image_attention_mask"] = model_kwargs["image_attention_mask"].index_select(
|
if attention_mask is not None:
|
||||||
|
model_kwargs["attention_mask"] = attention_mask.index_select(
|
||||||
|
0, expanded_return_idx
|
||||||
|
)
|
||||||
|
model_kwargs["image_attention_mask"] = model_kwargs[
|
||||||
|
"image_attention_mask"
|
||||||
|
].index_select(0, expanded_return_idx)
|
||||||
|
model_kwargs["pixel_values"] = model_kwargs["pixel_values"].index_select(
|
||||||
0, expanded_return_idx
|
0, expanded_return_idx
|
||||||
)
|
)
|
||||||
model_kwargs["pixel_values"] = model_kwargs["pixel_values"].index_select(0, expanded_return_idx)
|
|
||||||
|
|
||||||
if is_encoder_decoder:
|
if is_encoder_decoder:
|
||||||
if encoder_outputs is None:
|
if encoder_outputs is None:
|
||||||
raise ValueError("If `is_encoder_decoder` is True, make sure that `encoder_outputs` is defined.")
|
raise ValueError(
|
||||||
encoder_outputs["last_hidden_state"] = encoder_outputs.last_hidden_state.index_select(
|
"If `is_encoder_decoder` is True, make sure that `encoder_outputs` is defined."
|
||||||
|
)
|
||||||
|
encoder_outputs[
|
||||||
|
"last_hidden_state"
|
||||||
|
] = encoder_outputs.last_hidden_state.index_select(
|
||||||
0, expanded_return_idx.to(encoder_outputs.last_hidden_state.device)
|
0, expanded_return_idx.to(encoder_outputs.last_hidden_state.device)
|
||||||
)
|
)
|
||||||
model_kwargs["encoder_outputs"] = encoder_outputs
|
model_kwargs["encoder_outputs"] = encoder_outputs
|
||||||
@ -120,14 +144,17 @@ def update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder
|
|||||||
# update token_type_ids with last value
|
# update token_type_ids with last value
|
||||||
if "token_type_ids" in model_kwargs:
|
if "token_type_ids" in model_kwargs:
|
||||||
token_type_ids = model_kwargs["token_type_ids"]
|
token_type_ids = model_kwargs["token_type_ids"]
|
||||||
model_kwargs["token_type_ids"] = torch.cat([token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1)
|
model_kwargs["token_type_ids"] = torch.cat(
|
||||||
|
[token_type_ids, token_type_ids[:, -1].unsqueeze(-1)], dim=-1
|
||||||
|
)
|
||||||
|
|
||||||
# update attention masks
|
# update attention masks
|
||||||
if not is_encoder_decoder:
|
if not is_encoder_decoder:
|
||||||
if "attention_mask" in model_kwargs:
|
if "attention_mask" in model_kwargs:
|
||||||
attention_mask = model_kwargs["attention_mask"]
|
attention_mask = model_kwargs["attention_mask"]
|
||||||
model_kwargs["attention_mask"] = torch.cat(
|
model_kwargs["attention_mask"] = torch.cat(
|
||||||
[attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
|
[attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))],
|
||||||
|
dim=-1,
|
||||||
)
|
)
|
||||||
if "image_attention_mask" in model_kwargs:
|
if "image_attention_mask" in model_kwargs:
|
||||||
image_attention_mask = model_kwargs["image_attention_mask"]
|
image_attention_mask = model_kwargs["image_attention_mask"]
|
||||||
@ -180,8 +207,12 @@ def freeze_model(model, module_exceptions=[]):
|
|||||||
}
|
}
|
||||||
module_exceptions_mapped = [mapping[m] for m in module_exceptions]
|
module_exceptions_mapped = [mapping[m] for m in module_exceptions]
|
||||||
for module in model.modules():
|
for module in model.modules():
|
||||||
if module_exceptions and any([isinstance(module, t) for t in module_exceptions_mapped]):
|
if module_exceptions and any(
|
||||||
module.requires_grad_(True) # Explicitely setting it to true to avoid any mistakes
|
[isinstance(module, t) for t in module_exceptions_mapped]
|
||||||
|
):
|
||||||
|
module.requires_grad_(
|
||||||
|
True
|
||||||
|
) # Explicitely setting it to true to avoid any mistakes
|
||||||
else:
|
else:
|
||||||
module.requires_grad_(False)
|
module.requires_grad_(False)
|
||||||
return model
|
return model
|
||||||
@ -195,15 +226,21 @@ class IdeficsDecoupledPartialTPEmbedding(nn.Module):
|
|||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.num_embeddings = config.vocab_size
|
self.num_embeddings = config.vocab_size
|
||||||
self.weight = TensorParallelEmbedding(prefix="model.embed_tokens", weights=weights)
|
self.weight = TensorParallelEmbedding(
|
||||||
self.additional_weight = nn.Parameter(weights.get_tensor(f"model.embed_tokens.additional_embedding.weight"))
|
prefix="model.embed_tokens", weights=weights
|
||||||
|
)
|
||||||
|
self.additional_weight = nn.Parameter(
|
||||||
|
weights.get_tensor(f"model.embed_tokens.additional_embedding.weight")
|
||||||
|
)
|
||||||
|
|
||||||
def forward(self, input_ids):
|
def forward(self, input_ids):
|
||||||
# Clone so that we don't modify the original input_ids later on
|
# Clone so that we don't modify the original input_ids later on
|
||||||
input_ids = input_ids.clone()
|
input_ids = input_ids.clone()
|
||||||
additional_vocab_indices = torch.where(input_ids >= self.num_embeddings)
|
additional_vocab_indices = torch.where(input_ids >= self.num_embeddings)
|
||||||
input_ids_additional_vocab = input_ids[additional_vocab_indices]
|
input_ids_additional_vocab = input_ids[additional_vocab_indices]
|
||||||
additional_embeddings = torch.nn.functional.embedding(input_ids_additional_vocab - self.num_embeddings, self.additional_weight)
|
additional_embeddings = torch.nn.functional.embedding(
|
||||||
|
input_ids_additional_vocab - self.num_embeddings, self.additional_weight
|
||||||
|
)
|
||||||
|
|
||||||
# for successful lookup replace input_ids with 0, the results of these will be discarded anyway
|
# for successful lookup replace input_ids with 0, the results of these will be discarded anyway
|
||||||
input_ids[additional_vocab_indices] = 0
|
input_ids[additional_vocab_indices] = 0
|
||||||
@ -234,7 +271,10 @@ class IdeficsDecoupledTensorParallelLinear(nn.Module):
|
|||||||
config=config, prefix="lm_head", weights=weights
|
config=config, prefix="lm_head", weights=weights
|
||||||
)
|
)
|
||||||
self.additional_fc = FastLinear.load(
|
self.additional_fc = FastLinear.load(
|
||||||
config=config, prefix="lm_head.additional_fc", weights=weights, bias=False,
|
config=config,
|
||||||
|
prefix="lm_head.additional_fc",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
def forward(self, input: torch.Tensor) -> torch.Tensor:
|
||||||
@ -257,7 +297,10 @@ class IdeficsDecoupledTensorParallelLinear(nn.Module):
|
|||||||
|
|
||||||
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
|
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
|
||||||
def _make_causal_mask(
|
def _make_causal_mask(
|
||||||
input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
|
input_ids_shape: torch.Size,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
device: torch.device,
|
||||||
|
past_key_values_length: int = 0,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Make causal mask used for bi-directional self-attention.
|
Make causal mask used for bi-directional self-attention.
|
||||||
@ -269,8 +312,18 @@ def _make_causal_mask(
|
|||||||
mask = mask.to(dtype)
|
mask = mask.to(dtype)
|
||||||
|
|
||||||
if past_key_values_length > 0:
|
if past_key_values_length > 0:
|
||||||
mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1)
|
mask = torch.cat(
|
||||||
return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)
|
[
|
||||||
|
torch.zeros(
|
||||||
|
tgt_len, past_key_values_length, dtype=dtype, device=device
|
||||||
|
),
|
||||||
|
mask,
|
||||||
|
],
|
||||||
|
dim=-1,
|
||||||
|
)
|
||||||
|
return mask[None, None, :, :].expand(
|
||||||
|
bsz, 1, tgt_len, tgt_len + past_key_values_length
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
|
def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
|
||||||
@ -284,7 +337,9 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int]
|
|||||||
|
|
||||||
inverted_mask = 1.0 - expanded_mask
|
inverted_mask = 1.0 - expanded_mask
|
||||||
|
|
||||||
return inverted_mask.masked_fill(inverted_mask.to(torch.bool), torch.finfo(dtype).min)
|
return inverted_mask.masked_fill(
|
||||||
|
inverted_mask.to(torch.bool), torch.finfo(dtype).min
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class IdeficsRMSNorm(nn.Module):
|
class IdeficsRMSNorm(nn.Module):
|
||||||
@ -346,7 +401,6 @@ class IdeficsRMSNorm(nn.Module):
|
|||||||
if unwrap:
|
if unwrap:
|
||||||
normed_hidden_states = normed_hidden_states.view(*shape)
|
normed_hidden_states = normed_hidden_states.view(*shape)
|
||||||
|
|
||||||
|
|
||||||
return normed_hidden_states
|
return normed_hidden_states
|
||||||
|
|
||||||
|
|
||||||
@ -367,7 +421,10 @@ class IdeficsMLP(nn.Module):
|
|||||||
bias=False,
|
bias=False,
|
||||||
)
|
)
|
||||||
self.down_proj = TensorParallelRowLinear.load(
|
self.down_proj = TensorParallelRowLinear.load(
|
||||||
config, prefix=f"{prefix}.down_proj", weights=weights, bias=False,
|
config,
|
||||||
|
prefix=f"{prefix}.down_proj",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
)
|
)
|
||||||
self.act_fn = ACT2FN[config.hidden_act]
|
self.act_fn = ACT2FN[config.hidden_act]
|
||||||
|
|
||||||
@ -375,7 +432,9 @@ class IdeficsMLP(nn.Module):
|
|||||||
gate_up_states = self.gate_up_proj(hidden_states)
|
gate_up_states = self.gate_up_proj(hidden_states)
|
||||||
shape = gate_up_states.shape
|
shape = gate_up_states.shape
|
||||||
gate_up_states = gate_up_states.view(*shape[:-1], 2, shape[-1] // 2)
|
gate_up_states = gate_up_states.view(*shape[:-1], 2, shape[-1] // 2)
|
||||||
return self.down_proj(self.act_fn(gate_up_states[:, :, 0]) * gate_up_states[:, :, 1])
|
return self.down_proj(
|
||||||
|
self.act_fn(gate_up_states[:, :, 0]) * gate_up_states[:, :, 1]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# this was adapted from LlamaAttention
|
# this was adapted from LlamaAttention
|
||||||
@ -445,14 +504,22 @@ class IdeficsAttention(nn.Module):
|
|||||||
self.qk_layer_norms = qk_layer_norms
|
self.qk_layer_norms = qk_layer_norms
|
||||||
if self.qk_layer_norms:
|
if self.qk_layer_norms:
|
||||||
self.q_layer_norm = IdeficsRMSNorm(
|
self.q_layer_norm = IdeficsRMSNorm(
|
||||||
prefix=f"{prefix}.q_layer_norm", weights=weights, eps=config.rms_norm_eps
|
prefix=f"{prefix}.q_layer_norm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.rms_norm_eps,
|
||||||
)
|
)
|
||||||
self.k_layer_norm = IdeficsRMSNorm(
|
self.k_layer_norm = IdeficsRMSNorm(
|
||||||
prefix=f"{prefix}.q_layer_norm", weights=weights, eps=config.rms_norm_eps
|
prefix=f"{prefix}.q_layer_norm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.rms_norm_eps,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
||||||
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
|
return (
|
||||||
|
tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
|
||||||
|
.transpose(1, 2)
|
||||||
|
.contiguous()
|
||||||
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@ -470,20 +537,42 @@ class IdeficsAttention(nn.Module):
|
|||||||
bsz, q_len, _ = hidden_states.size()
|
bsz, q_len, _ = hidden_states.size()
|
||||||
|
|
||||||
if is_cross_attention:
|
if is_cross_attention:
|
||||||
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim)# .transpose(1, 2)
|
query_states = self.q_proj(hidden_states).view(
|
||||||
|
bsz, q_len, self.num_heads, self.head_dim
|
||||||
|
) # .transpose(1, 2)
|
||||||
query_states = query_states.transpose(1, 2)
|
query_states = query_states.transpose(1, 2)
|
||||||
_, kv_len, _ = key_value_states.size() # Note that, in this case, `kv_len` == `kv_seq_len`
|
(
|
||||||
key_states = self.k_proj(key_value_states).view(bsz, kv_len, self.num_heads, self.head_dim).transpose(1, 2)
|
_,
|
||||||
|
kv_len,
|
||||||
|
_,
|
||||||
|
) = (
|
||||||
|
key_value_states.size()
|
||||||
|
) # Note that, in this case, `kv_len` == `kv_seq_len`
|
||||||
|
key_states = (
|
||||||
|
self.k_proj(key_value_states)
|
||||||
|
.view(bsz, kv_len, self.num_heads, self.head_dim)
|
||||||
|
.transpose(1, 2)
|
||||||
|
)
|
||||||
value_states = (
|
value_states = (
|
||||||
self.v_proj(key_value_states).view(bsz, kv_len, self.num_heads, self.head_dim).transpose(1, 2)
|
self.v_proj(key_value_states)
|
||||||
|
.view(bsz, kv_len, self.num_heads, self.head_dim)
|
||||||
|
.transpose(1, 2)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
qkv = self.qkv(hidden_states)
|
qkv = self.qkv(hidden_states)
|
||||||
query_states, key_states, value_states = qkv.split(self.num_heads * self.head_dim, dim=2)
|
query_states, key_states, value_states = qkv.split(
|
||||||
|
self.num_heads * self.head_dim, dim=2
|
||||||
|
)
|
||||||
|
|
||||||
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim)# .transpose(1, 2)
|
query_states = query_states.view(
|
||||||
key_states = key_states.view(bsz, q_len, self.num_heads, self.head_dim)# . transpose(1, 2)
|
bsz, q_len, self.num_heads, self.head_dim
|
||||||
value_states = value_states.view(bsz, q_len, self.num_heads, self.head_dim)# .transpose(1, 2)
|
) # .transpose(1, 2)
|
||||||
|
key_states = key_states.view(
|
||||||
|
bsz, q_len, self.num_heads, self.head_dim
|
||||||
|
) # . transpose(1, 2)
|
||||||
|
value_states = value_states.view(
|
||||||
|
bsz, q_len, self.num_heads, self.head_dim
|
||||||
|
) # .transpose(1, 2)
|
||||||
kv_seq_len = q_len
|
kv_seq_len = q_len
|
||||||
if past_key_value is not None:
|
if past_key_value is not None:
|
||||||
kv_seq_len += past_key_value[0].shape[-2]
|
kv_seq_len += past_key_value[0].shape[-2]
|
||||||
@ -493,10 +582,14 @@ class IdeficsAttention(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
shape = query_states.shape
|
shape = query_states.shape
|
||||||
query_states = self.rotary_emb(query_states.view(-1, *shape[2:]), cos, sin).view(shape)
|
query_states = self.rotary_emb(
|
||||||
|
query_states.view(-1, *shape[2:]), cos, sin
|
||||||
|
).view(shape)
|
||||||
|
|
||||||
shape = key_states.shape
|
shape = key_states.shape
|
||||||
key_states = self.rotary_emb(key_states.reshape(-1, *shape[2:]), cos, sin).view(shape)
|
key_states = self.rotary_emb(
|
||||||
|
key_states.reshape(-1, *shape[2:]), cos, sin
|
||||||
|
).view(shape)
|
||||||
|
|
||||||
query_states = query_states.transpose(1, 2)
|
query_states = query_states.transpose(1, 2)
|
||||||
key_states = key_states.transpose(1, 2)
|
key_states = key_states.transpose(1, 2)
|
||||||
@ -571,8 +664,14 @@ class IdeficsDecoderLayer(nn.Module):
|
|||||||
prefix=f"{prefix}.mlp",
|
prefix=f"{prefix}.mlp",
|
||||||
weights=weights,
|
weights=weights,
|
||||||
)
|
)
|
||||||
self.input_layernorm = IdeficsRMSNorm(prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps)
|
self.input_layernorm = IdeficsRMSNorm(
|
||||||
self.post_attention_layernorm = IdeficsRMSNorm(prefix=f"{prefix}.post_attention_layernorm", weights=weights, eps=config.rms_norm_eps)
|
prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
|
||||||
|
)
|
||||||
|
self.post_attention_layernorm = IdeficsRMSNorm(
|
||||||
|
prefix=f"{prefix}.post_attention_layernorm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.rms_norm_eps,
|
||||||
|
)
|
||||||
self.dropout = config.dropout
|
self.dropout = config.dropout
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
@ -583,7 +682,9 @@ class IdeficsDecoderLayer(nn.Module):
|
|||||||
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
||||||
output_attentions: Optional[bool] = False,
|
output_attentions: Optional[bool] = False,
|
||||||
use_cache: Optional[bool] = False,
|
use_cache: Optional[bool] = False,
|
||||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
) -> Tuple[
|
||||||
|
torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||||
@ -650,14 +751,22 @@ class IdeficsGatedCrossAttentionLayer(nn.Module):
|
|||||||
prefix=f"{prefix}.mlp",
|
prefix=f"{prefix}.mlp",
|
||||||
weights=weights,
|
weights=weights,
|
||||||
)
|
)
|
||||||
self.input_layernorm = IdeficsRMSNorm(prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps)
|
self.input_layernorm = IdeficsRMSNorm(
|
||||||
self.post_attention_layernorm = IdeficsRMSNorm(prefix=f"{prefix}.post_attention_layernorm", weights=weights, eps=config.rms_norm_eps)
|
prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
|
||||||
|
)
|
||||||
|
self.post_attention_layernorm = IdeficsRMSNorm(
|
||||||
|
prefix=f"{prefix}.post_attention_layernorm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.rms_norm_eps,
|
||||||
|
)
|
||||||
self.config = config.dropout
|
self.config = config.dropout
|
||||||
|
|
||||||
self.act_cross_attn = nn.Tanh()
|
self.act_cross_attn = nn.Tanh()
|
||||||
self.act_dense = nn.Tanh()
|
self.act_dense = nn.Tanh()
|
||||||
|
|
||||||
self.alpha_cross_attn = nn.Parameter(weights.get_tensor(f"{prefix}.alpha_cross_attn"))
|
self.alpha_cross_attn = nn.Parameter(
|
||||||
|
weights.get_tensor(f"{prefix}.alpha_cross_attn")
|
||||||
|
)
|
||||||
self.alpha_dense = nn.Parameter(weights.get_tensor(f"{prefix}.alpha_dense"))
|
self.alpha_dense = nn.Parameter(weights.get_tensor(f"{prefix}.alpha_dense"))
|
||||||
|
|
||||||
if not (hasattr(self, "alpha_cross_attn") and hasattr(self, "alpha_dense")):
|
if not (hasattr(self, "alpha_cross_attn") and hasattr(self, "alpha_dense")):
|
||||||
@ -673,7 +782,9 @@ class IdeficsGatedCrossAttentionLayer(nn.Module):
|
|||||||
use_cache: Optional[bool] = False,
|
use_cache: Optional[bool] = False,
|
||||||
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
past_key_value: Optional[Tuple[torch.Tensor]] = None,
|
||||||
no_images: Optional[bool] = False,
|
no_images: Optional[bool] = False,
|
||||||
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
) -> Tuple[
|
||||||
|
torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]
|
||||||
|
]:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
|
||||||
@ -695,7 +806,9 @@ class IdeficsGatedCrossAttentionLayer(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
if past_key_value is not None:
|
if past_key_value is not None:
|
||||||
raise NotImplementedError("Past key value states are not implemented for Idefics cross attention module.")
|
raise NotImplementedError(
|
||||||
|
"Past key value states are not implemented for Idefics cross attention module."
|
||||||
|
)
|
||||||
|
|
||||||
residual = hidden_states
|
residual = hidden_states
|
||||||
|
|
||||||
@ -711,7 +824,9 @@ class IdeficsGatedCrossAttentionLayer(nn.Module):
|
|||||||
# hidden_states = nn.functional.dropout(hidden_states, p=self.config, training=self.training)
|
# hidden_states = nn.functional.dropout(hidden_states, p=self.config, training=self.training)
|
||||||
# when there are no images the model is used in pure language mode
|
# when there are no images the model is used in pure language mode
|
||||||
gate = 0 if no_images else 1
|
gate = 0 if no_images else 1
|
||||||
hidden_states = residual + gate * self.act_cross_attn(self.alpha_cross_attn) * hidden_states
|
hidden_states = (
|
||||||
|
residual + gate * self.act_cross_attn(self.alpha_cross_attn) * hidden_states
|
||||||
|
)
|
||||||
|
|
||||||
# Fully Connected
|
# Fully Connected
|
||||||
residual = hidden_states
|
residual = hidden_states
|
||||||
@ -896,11 +1011,14 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
self.gated_cross_attn_layers = nn.ModuleList(
|
self.gated_cross_attn_layers = nn.ModuleList(
|
||||||
[
|
[
|
||||||
IdeficsGatedCrossAttentionLayer(layer_id, config, weights)
|
IdeficsGatedCrossAttentionLayer(layer_id, config, weights)
|
||||||
for layer_id in range(num_cross_layers)]
|
for layer_id in range(num_cross_layers)
|
||||||
|
]
|
||||||
)
|
)
|
||||||
# self.gradient_checkpointing = False
|
# self.gradient_checkpointing = False
|
||||||
|
|
||||||
self.norm = IdeficsRMSNorm(prefix=f"model.norm", weights=weights, eps=config.rms_norm_eps)
|
self.norm = IdeficsRMSNorm(
|
||||||
|
prefix=f"model.norm", weights=weights, eps=config.rms_norm_eps
|
||||||
|
)
|
||||||
|
|
||||||
# self.gradient_checkpointing = False
|
# self.gradient_checkpointing = False
|
||||||
# Initialize weights and apply final processing
|
# Initialize weights and apply final processing
|
||||||
@ -932,7 +1050,9 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
# self.embed_tokens = value
|
# self.embed_tokens = value
|
||||||
|
|
||||||
# Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
|
# Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
|
||||||
def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
|
def _prepare_decoder_attention_mask(
|
||||||
|
self, attention_mask, input_shape, inputs_embeds, past_key_values_length
|
||||||
|
):
|
||||||
# create causal mask
|
# create causal mask
|
||||||
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
||||||
combined_attention_mask = None
|
combined_attention_mask = None
|
||||||
@ -946,11 +1066,13 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
|
|
||||||
if attention_mask is not None:
|
if attention_mask is not None:
|
||||||
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
|
||||||
expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
|
expanded_attn_mask = _expand_mask(
|
||||||
inputs_embeds.device
|
attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
|
||||||
)
|
).to(inputs_embeds.device)
|
||||||
combined_attention_mask = (
|
combined_attention_mask = (
|
||||||
expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
|
expanded_attn_mask
|
||||||
|
if combined_attention_mask is None
|
||||||
|
else expanded_attn_mask + combined_attention_mask
|
||||||
)
|
)
|
||||||
|
|
||||||
return combined_attention_mask
|
return combined_attention_mask
|
||||||
@ -974,23 +1096,35 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
) -> Union[Tuple, BaseModelOutputWithPastImage]:
|
) -> Union[Tuple, BaseModelOutputWithPastImage]:
|
||||||
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = (
|
||||||
|
output_attentions
|
||||||
|
if output_attentions is not None
|
||||||
|
else self.config.output_attentions
|
||||||
|
)
|
||||||
output_hidden_states = (
|
output_hidden_states = (
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
output_hidden_states
|
||||||
|
if output_hidden_states is not None
|
||||||
|
else self.config.output_hidden_states
|
||||||
)
|
)
|
||||||
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
use_cache = use_cache if use_cache is not None else self.config.use_cache
|
||||||
|
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
return_dict = (
|
||||||
|
return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
|
)
|
||||||
|
|
||||||
# retrieve input_ids and inputs_embeds
|
# retrieve input_ids and inputs_embeds
|
||||||
if input_ids is not None and inputs_embeds is not None:
|
if input_ids is not None and inputs_embeds is not None:
|
||||||
raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
|
raise ValueError(
|
||||||
|
"You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time"
|
||||||
|
)
|
||||||
elif input_ids is not None:
|
elif input_ids is not None:
|
||||||
batch_size, seq_length = input_ids.shape
|
batch_size, seq_length = input_ids.shape
|
||||||
elif inputs_embeds is not None:
|
elif inputs_embeds is not None:
|
||||||
batch_size, seq_length, _ = inputs_embeds.shape
|
batch_size, seq_length, _ = inputs_embeds.shape
|
||||||
else:
|
else:
|
||||||
raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
|
raise ValueError(
|
||||||
|
"You have to specify either decoder_input_ids or decoder_inputs_embeds"
|
||||||
|
)
|
||||||
|
|
||||||
seq_length_with_past = seq_length
|
seq_length_with_past = seq_length
|
||||||
past_key_values_length = 0
|
past_key_values_length = 0
|
||||||
@ -1006,7 +1140,10 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
elif position_ids is None:
|
elif position_ids is None:
|
||||||
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
device = input_ids.device if input_ids is not None else inputs_embeds.device
|
||||||
position_ids = torch.arange(
|
position_ids = torch.arange(
|
||||||
past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
|
past_key_values_length,
|
||||||
|
seq_length + past_key_values_length,
|
||||||
|
dtype=torch.long,
|
||||||
|
device=device,
|
||||||
)
|
)
|
||||||
position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
|
position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
|
||||||
else:
|
else:
|
||||||
@ -1016,29 +1153,52 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
|
|
||||||
if image_hidden_states is None:
|
if image_hidden_states is None:
|
||||||
if pixel_values is None and image_embeddings is None:
|
if pixel_values is None and image_embeddings is None:
|
||||||
raise ValueError("Either pixel_values and image_embeddings have to be not-None.")
|
raise ValueError(
|
||||||
|
"Either pixel_values and image_embeddings have to be not-None."
|
||||||
|
)
|
||||||
|
|
||||||
elif pixel_values is not None and image_embeddings is not None:
|
elif pixel_values is not None and image_embeddings is not None:
|
||||||
raise ValueError("You cannot specify both pixel_values and image_embeddings at the same time")
|
raise ValueError(
|
||||||
|
"You cannot specify both pixel_values and image_embeddings at the same time"
|
||||||
|
)
|
||||||
|
|
||||||
elif pixel_values is not None:
|
elif pixel_values is not None:
|
||||||
no_images = len(torch.nonzero(pixel_values)) == 0
|
no_images = len(torch.nonzero(pixel_values)) == 0
|
||||||
pixel_values = pixel_values.to(dtype=self.dtype, device=device) # fp16 compatibility
|
pixel_values = pixel_values.to(
|
||||||
|
dtype=self.dtype, device=device
|
||||||
|
) # fp16 compatibility
|
||||||
batch_size, num_images = pixel_values.shape[:2]
|
batch_size, num_images = pixel_values.shape[:2]
|
||||||
pixel_values = pixel_values.contiguous().view(batch_size * num_images, *pixel_values.shape[2:])
|
pixel_values = pixel_values.contiguous().view(
|
||||||
|
batch_size * num_images, *pixel_values.shape[2:]
|
||||||
|
)
|
||||||
|
|
||||||
# Get sequence from the vision encoder
|
# Get sequence from the vision encoder
|
||||||
image_hidden_states = self.vision_model(pixel_values=pixel_values).last_hidden_state
|
image_hidden_states = self.vision_model(
|
||||||
|
pixel_values=pixel_values
|
||||||
|
).last_hidden_state
|
||||||
|
|
||||||
elif image_embeddings is not None:
|
elif image_embeddings is not None:
|
||||||
batch_size, num_images, image_seq_len, image_hidden_size = image_embeddings.size()
|
(
|
||||||
image_hidden_states = image_embeddings.to(dtype=self.dtype, device=input_ids.device)
|
batch_size,
|
||||||
image_hidden_states = image_hidden_states.view(batch_size * num_images, image_seq_len, image_hidden_size)
|
num_images,
|
||||||
|
image_seq_len,
|
||||||
|
image_hidden_size,
|
||||||
|
) = image_embeddings.size()
|
||||||
|
image_hidden_states = image_embeddings.to(
|
||||||
|
dtype=self.dtype, device=input_ids.device
|
||||||
|
)
|
||||||
|
image_hidden_states = image_hidden_states.view(
|
||||||
|
batch_size * num_images, image_seq_len, image_hidden_size
|
||||||
|
)
|
||||||
|
|
||||||
if self.config.use_resampler:
|
if self.config.use_resampler:
|
||||||
image_hidden_states = self.perceiver_resampler(image_hidden_states)
|
image_hidden_states = self.perceiver_resampler(image_hidden_states)
|
||||||
image_seq_len, image_hidden_size = image_hidden_states.size(1), image_hidden_states.size(2)
|
image_seq_len, image_hidden_size = image_hidden_states.size(
|
||||||
image_hidden_states = image_hidden_states.view(batch_size, num_images * image_seq_len, image_hidden_size)
|
1
|
||||||
|
), image_hidden_states.size(2)
|
||||||
|
image_hidden_states = image_hidden_states.view(
|
||||||
|
batch_size, num_images * image_seq_len, image_hidden_size
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
no_images = False
|
no_images = False
|
||||||
num_images = pixel_values.shape[1]
|
num_images = pixel_values.shape[1]
|
||||||
@ -1050,7 +1210,9 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
text_seq_len = image_attention_mask.size(1)
|
text_seq_len = image_attention_mask.size(1)
|
||||||
image_attention_mask = image_attention_mask.unsqueeze(-1)
|
image_attention_mask = image_attention_mask.unsqueeze(-1)
|
||||||
image_attention_mask = image_attention_mask.repeat(1, 1, 1, image_seq_len)
|
image_attention_mask = image_attention_mask.repeat(1, 1, 1, image_seq_len)
|
||||||
image_attention_mask = image_attention_mask.view(batch_size, text_seq_len, num_images * image_seq_len)
|
image_attention_mask = image_attention_mask.view(
|
||||||
|
batch_size, text_seq_len, num_images * image_seq_len
|
||||||
|
)
|
||||||
image_batch_size, image_sequence_length, _ = image_hidden_states.size()
|
image_batch_size, image_sequence_length, _ = image_hidden_states.size()
|
||||||
image_hidden_shape = (image_batch_size, image_sequence_length)
|
image_hidden_shape = (image_batch_size, image_sequence_length)
|
||||||
if image_attention_mask is None:
|
if image_attention_mask is None:
|
||||||
@ -1060,7 +1222,6 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
# if list(image_attention_mask.shape) != [4, 1, 1024, 64]:
|
# if list(image_attention_mask.shape) != [4, 1, 1024, 64]:
|
||||||
# raise ValueError(f"Image hidden_states {image_hidden_states.shape} - mask {image_attention_mask.shape} {num_images} {image_seq_len} {text_seq_len}")
|
# raise ValueError(f"Image hidden_states {image_hidden_states.shape} - mask {image_attention_mask.shape} {num_images} {image_seq_len} {text_seq_len}")
|
||||||
|
|
||||||
|
|
||||||
# if image_hidden_states is not None:
|
# if image_hidden_states is not None:
|
||||||
# else:
|
# else:
|
||||||
# image_attention_mask = None
|
# image_attention_mask = None
|
||||||
@ -1070,10 +1231,15 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
# embed positions
|
# embed positions
|
||||||
if attention_mask is None:
|
if attention_mask is None:
|
||||||
attention_mask = torch.ones(
|
attention_mask = torch.ones(
|
||||||
(batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
|
(batch_size, seq_length_with_past),
|
||||||
|
dtype=torch.bool,
|
||||||
|
device=inputs_embeds.device,
|
||||||
)
|
)
|
||||||
attention_mask = self._prepare_decoder_attention_mask(
|
attention_mask = self._prepare_decoder_attention_mask(
|
||||||
attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
|
attention_mask,
|
||||||
|
(batch_size, seq_length),
|
||||||
|
inputs_embeds,
|
||||||
|
past_key_values_length,
|
||||||
)
|
)
|
||||||
|
|
||||||
hidden_states = inputs_embeds
|
hidden_states = inputs_embeds
|
||||||
@ -1094,7 +1260,9 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
if output_hidden_states:
|
if output_hidden_states:
|
||||||
all_hidden_states += (hidden_states,)
|
all_hidden_states += (hidden_states,)
|
||||||
|
|
||||||
past_key_value = past_key_values[idx] if past_key_values is not None else None
|
past_key_value = (
|
||||||
|
past_key_values[idx] if past_key_values is not None else None
|
||||||
|
)
|
||||||
|
|
||||||
def vblock(
|
def vblock(
|
||||||
main_block,
|
main_block,
|
||||||
@ -1194,7 +1362,11 @@ class IdeficsModel(IdeficsPreTrainedModel):
|
|||||||
|
|
||||||
next_cache = next_decoder_cache if use_cache else None
|
next_cache = next_decoder_cache if use_cache else None
|
||||||
if not return_dict:
|
if not return_dict:
|
||||||
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
|
return tuple(
|
||||||
|
v
|
||||||
|
for v in [hidden_states, next_cache, all_hidden_states, all_self_attns]
|
||||||
|
if v is not None
|
||||||
|
)
|
||||||
return BaseModelOutputWithPastImage(
|
return BaseModelOutputWithPastImage(
|
||||||
last_hidden_state=hidden_states,
|
last_hidden_state=hidden_states,
|
||||||
past_key_values=next_cache,
|
past_key_values=next_cache,
|
||||||
@ -1264,11 +1436,19 @@ class IdeficsForVisionText2Text(IdeficsPreTrainedModel):
|
|||||||
"Hey, are you consciours? Can you talk to me?\nI'm not consciours, but I can talk to you."
|
"Hey, are you consciours? Can you talk to me?\nI'm not consciours, but I can talk to you."
|
||||||
```"""
|
```"""
|
||||||
|
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = (
|
||||||
output_hidden_states = (
|
output_attentions
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
if output_attentions is not None
|
||||||
|
else self.config.output_attentions
|
||||||
|
)
|
||||||
|
output_hidden_states = (
|
||||||
|
output_hidden_states
|
||||||
|
if output_hidden_states is not None
|
||||||
|
else self.config.output_hidden_states
|
||||||
|
)
|
||||||
|
return_dict = (
|
||||||
|
return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
)
|
)
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
||||||
|
|
||||||
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
||||||
outputs = self.model(
|
outputs = self.model(
|
||||||
@ -1298,7 +1478,7 @@ class IdeficsForVisionText2Text(IdeficsPreTrainedModel):
|
|||||||
past_key_values=outputs.past_key_values,
|
past_key_values=outputs.past_key_values,
|
||||||
hidden_states=outputs.hidden_states,
|
hidden_states=outputs.hidden_states,
|
||||||
attentions=outputs.attentions,
|
attentions=outputs.attentions,
|
||||||
image_hidden_states=outputs.image_hidden_states
|
image_hidden_states=outputs.image_hidden_states,
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
|
def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
|
||||||
@ -1316,12 +1496,20 @@ class IdeficsForVisionText2Text(IdeficsPreTrainedModel):
|
|||||||
return expand_inputs_for_generation(*args, **model_kwargs)
|
return expand_inputs_for_generation(*args, **model_kwargs)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=False):
|
def _update_model_kwargs_for_generation(
|
||||||
return update_model_kwargs_for_generation(outputs, model_kwargs, is_encoder_decoder=is_encoder_decoder)
|
outputs, model_kwargs, is_encoder_decoder=False
|
||||||
|
):
|
||||||
|
return update_model_kwargs_for_generation(
|
||||||
|
outputs, model_kwargs, is_encoder_decoder=is_encoder_decoder
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _reorder_cache(past, beam_idx):
|
def _reorder_cache(past, beam_idx):
|
||||||
reordered_past = ()
|
reordered_past = ()
|
||||||
for layer_past in past:
|
for layer_past in past:
|
||||||
reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
|
reordered_past += (
|
||||||
|
tuple(
|
||||||
|
past_state.index_select(0, beam_idx) for past_state in layer_past
|
||||||
|
),
|
||||||
|
)
|
||||||
return reordered_past
|
return reordered_past
|
||||||
|
@ -46,7 +46,8 @@ from text_generation_server.utils.layers import (
|
|||||||
TensorParallelRowLinear,
|
TensorParallelRowLinear,
|
||||||
)
|
)
|
||||||
|
|
||||||
EPS=1e-5
|
EPS = 1e-5
|
||||||
|
|
||||||
|
|
||||||
class IdeficsPerceiverResampler(nn.Module):
|
class IdeficsPerceiverResampler(nn.Module):
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -78,7 +79,12 @@ class IdeficsPerceiverResampler(nn.Module):
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.embed_dim, self.n_heads, self.head_dim, self.n_latents = embed_dim, n_heads, head_dim, n_latents
|
self.embed_dim, self.n_heads, self.head_dim, self.n_latents = (
|
||||||
|
embed_dim,
|
||||||
|
n_heads,
|
||||||
|
head_dim,
|
||||||
|
n_latents,
|
||||||
|
)
|
||||||
self.qk_layer_norms = config.perceiver_config.qk_layer_norms_perceiver
|
self.qk_layer_norms = config.perceiver_config.qk_layer_norms_perceiver
|
||||||
|
|
||||||
# Create Latents for Perceiver
|
# Create Latents for Perceiver
|
||||||
@ -107,14 +113,16 @@ class IdeficsPerceiverResampler(nn.Module):
|
|||||||
prefix=f"{prefix}.blocks.{layer_id}.1",
|
prefix=f"{prefix}.blocks.{layer_id}.1",
|
||||||
intermediate_size=self.intermediate_dim,
|
intermediate_size=self.intermediate_dim,
|
||||||
config=config,
|
config=config,
|
||||||
weights=weights
|
weights=weights,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
for layer_id in range(depth)
|
for layer_id in range(depth)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
self.layer_norm = nn.LayerNorm.load(prefix=f"{prefix}.layer_norm", weights=weights, eps=EPS)
|
self.layer_norm = nn.LayerNorm.load(
|
||||||
|
prefix=f"{prefix}.layer_norm", weights=weights, eps=EPS
|
||||||
|
)
|
||||||
|
|
||||||
def forward(self, context: torch.Tensor) -> torch.Tensor:
|
def forward(self, context: torch.Tensor) -> torch.Tensor:
|
||||||
"""Resample arbitrary length context & *compress* down to self.n_latents latent embeddings"""
|
"""Resample arbitrary length context & *compress* down to self.n_latents latent embeddings"""
|
||||||
@ -130,25 +138,34 @@ class IdeficsPerceiverResampler(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class IdeficsPerceiverAttention(nn.Module):
|
class IdeficsPerceiverAttention(nn.Module):
|
||||||
def __init__(self,
|
def __init__(
|
||||||
|
self,
|
||||||
prefix,
|
prefix,
|
||||||
config,
|
config,
|
||||||
embed_dim: int,
|
embed_dim: int,
|
||||||
n_heads: int,
|
n_heads: int,
|
||||||
head_dim: int,
|
head_dim: int,
|
||||||
qk_layer_norms: bool,
|
qk_layer_norms: bool,
|
||||||
weights
|
weights,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Perceiver Cross-Attention Module --> let long-form inputs be `context`, resampled embeddings be `latents`"""
|
"""Perceiver Cross-Attention Module --> let long-form inputs be `context`, resampled embeddings be `latents`"""
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.embed_dim, self.n_heads, self.head_dim = embed_dim, n_heads, head_dim
|
self.embed_dim, self.n_heads, self.head_dim = embed_dim, n_heads, head_dim
|
||||||
self.qk_layer_norms = qk_layer_norms
|
self.qk_layer_norms = qk_layer_norms
|
||||||
# Normalization & Scaling
|
# Normalization & Scaling
|
||||||
self.context_layer_norm = nn.LayerNorm.load(prefix=f"{prefix}.context_layer_norm", weights=weights, eps=EPS)
|
self.context_layer_norm = nn.LayerNorm.load(
|
||||||
self.latents_layer_norm = nn.LayerNorm.load(prefix=f"{prefix}.latents_layer_norm", weights=weights, eps=EPS)
|
prefix=f"{prefix}.context_layer_norm", weights=weights, eps=EPS
|
||||||
|
)
|
||||||
|
self.latents_layer_norm = nn.LayerNorm.load(
|
||||||
|
prefix=f"{prefix}.latents_layer_norm", weights=weights, eps=EPS
|
||||||
|
)
|
||||||
if self.qk_layer_norms:
|
if self.qk_layer_norms:
|
||||||
self.q_layer_norm = nn.LayerNorm.load(prefix=f"{prefix}.q_layer_norm", weights=weights, eps=EPS)
|
self.q_layer_norm = nn.LayerNorm.load(
|
||||||
self.k_layer_norm = nn.LayerNorm.load(prefix=f"{prefix}.k_layer_norm", weights=weights, eps=EPS)
|
prefix=f"{prefix}.q_layer_norm", weights=weights, eps=EPS
|
||||||
|
)
|
||||||
|
self.k_layer_norm = nn.LayerNorm.load(
|
||||||
|
prefix=f"{prefix}.k_layer_norm", weights=weights, eps=EPS
|
||||||
|
)
|
||||||
|
|
||||||
self.qk_scale = self.head_dim**-0.5
|
self.qk_scale = self.head_dim**-0.5
|
||||||
|
|
||||||
@ -202,7 +219,12 @@ class IdeficsPerceiverAttention(nn.Module):
|
|||||||
# Multiheaded Self-Attention w/ stable softmax (subtract per-row max -- `amax` -- before softmax call)
|
# Multiheaded Self-Attention w/ stable softmax (subtract per-row max -- `amax` -- before softmax call)
|
||||||
# =>> `attn` should be a 2D matrix of shape [n_latents x (context + n_latents)]
|
# =>> `attn` should be a 2D matrix of shape [n_latents x (context + n_latents)]
|
||||||
# einsum.rearrange(x, "bsz seq (heads embed) -> bsz heads seq embed", heads=self.n_heads)
|
# einsum.rearrange(x, "bsz seq (heads embed) -> bsz heads seq embed", heads=self.n_heads)
|
||||||
q, k, v = [x.reshape(batch_size, x.shape[1], self.n_heads, self.head_dim).transpose(1, 2) for x in (q, k, v)]
|
q, k, v = [
|
||||||
|
x.reshape(batch_size, x.shape[1], self.n_heads, self.head_dim).transpose(
|
||||||
|
1, 2
|
||||||
|
)
|
||||||
|
for x in (q, k, v)
|
||||||
|
]
|
||||||
|
|
||||||
if self.qk_layer_norms:
|
if self.qk_layer_norms:
|
||||||
q = self.q_layer_norm(q)
|
q = self.q_layer_norm(q)
|
||||||
@ -219,7 +241,8 @@ class IdeficsPerceiverAttention(nn.Module):
|
|||||||
|
|
||||||
|
|
||||||
class IdeficsMLP(nn.Module):
|
class IdeficsMLP(nn.Module):
|
||||||
def __init__(self,
|
def __init__(
|
||||||
|
self,
|
||||||
prefix,
|
prefix,
|
||||||
intermediate_size,
|
intermediate_size,
|
||||||
config,
|
config,
|
||||||
@ -230,14 +253,22 @@ class IdeficsMLP(nn.Module):
|
|||||||
self.embed_dim = config.vision_config.embed_dim
|
self.embed_dim = config.vision_config.embed_dim
|
||||||
self.ln = nn.LayerNorm.load(prefix=f"{prefix}.ln", weights=weights, eps=EPS)
|
self.ln = nn.LayerNorm.load(prefix=f"{prefix}.ln", weights=weights, eps=EPS)
|
||||||
self.fc = TensorParallelColumnLinear.load(
|
self.fc = TensorParallelColumnLinear.load(
|
||||||
config=config, prefix=f"{prefix}.fc", weights=weights, bias=False,
|
config=config,
|
||||||
|
prefix=f"{prefix}.fc",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
)
|
)
|
||||||
self.act = nn.ReLU()
|
self.act = nn.ReLU()
|
||||||
self.c_proj = TensorParallelRowLinear.load(
|
self.c_proj = TensorParallelRowLinear.load(
|
||||||
config=config, prefix=f"{prefix}.c_proj", weights=weights, bias=False,
|
config=config,
|
||||||
|
prefix=f"{prefix}.c_proj",
|
||||||
|
weights=weights,
|
||||||
|
bias=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def forward(self, hidden_states: Optional[Tuple[torch.FloatTensor]]) -> torch.FloatTensor:
|
def forward(
|
||||||
|
self, hidden_states: Optional[Tuple[torch.FloatTensor]]
|
||||||
|
) -> torch.FloatTensor:
|
||||||
hidden_states = self.ln(hidden_states)
|
hidden_states = self.ln(hidden_states)
|
||||||
hidden_states = self.fc(hidden_states)
|
hidden_states = self.fc(hidden_states)
|
||||||
hidden_states = self.act(hidden_states)
|
hidden_states = self.act(hidden_states)
|
||||||
|
@ -21,9 +21,16 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from transformers.feature_extraction_utils import BatchFeature
|
from transformers.feature_extraction_utils import BatchFeature
|
||||||
from transformers.processing_utils import ProcessorMixin
|
from transformers.processing_utils import ProcessorMixin
|
||||||
from transformers.tokenization_utils_base import BatchEncoding, PaddingStrategy, TextInput, TruncationStrategy
|
from transformers.tokenization_utils_base import (
|
||||||
|
BatchEncoding,
|
||||||
|
PaddingStrategy,
|
||||||
|
TextInput,
|
||||||
|
TruncationStrategy,
|
||||||
|
)
|
||||||
from transformers.utils import TensorType, is_torch_available
|
from transformers.utils import TensorType, is_torch_available
|
||||||
from text_generation_server.models.custom_modeling.idefics_image_processing import IdeficsImageProcessor
|
from text_generation_server.models.custom_modeling.idefics_image_processing import (
|
||||||
|
IdeficsImageProcessor,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if is_torch_available():
|
if is_torch_available():
|
||||||
@ -124,7 +131,14 @@ class IdeficsProcessor(ProcessorMixin):
|
|||||||
image_processor_class = "IdeficsImageProcessor"
|
image_processor_class = "IdeficsImageProcessor"
|
||||||
tokenizer_class = "LlamaTokenizerFast"
|
tokenizer_class = "LlamaTokenizerFast"
|
||||||
|
|
||||||
def __init__(self, image_processor, tokenizer=None, image_size=224, add_end_of_utterance_token=None, **kwargs):
|
def __init__(
|
||||||
|
self,
|
||||||
|
image_processor,
|
||||||
|
tokenizer=None,
|
||||||
|
image_size=224,
|
||||||
|
add_end_of_utterance_token=None,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
if image_processor is None:
|
if image_processor is None:
|
||||||
raise ValueError("You need to specify an `image_processor`.")
|
raise ValueError("You need to specify an `image_processor`.")
|
||||||
if tokenizer is None:
|
if tokenizer is None:
|
||||||
@ -142,7 +156,8 @@ class IdeficsProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
self.tokenizer_was_trained_with_end_of_utterance_token = (
|
self.tokenizer_was_trained_with_end_of_utterance_token = (
|
||||||
True
|
True
|
||||||
if "<end_of_utterance>" in self.tokenizer.special_tokens_map.get("additional_special_tokens", [])
|
if "<end_of_utterance>"
|
||||||
|
in self.tokenizer.special_tokens_map.get("additional_special_tokens", [])
|
||||||
else False
|
else False
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -265,7 +280,9 @@ class IdeficsProcessor(ProcessorMixin):
|
|||||||
|
|
||||||
# if the value isn't overriden by the user, check if the tokenizer was trained with this token and then use it
|
# if the value isn't overriden by the user, check if the tokenizer was trained with this token and then use it
|
||||||
if add_end_of_utterance_token is None:
|
if add_end_of_utterance_token is None:
|
||||||
add_end_of_utterance_token = self.tokenizer_was_trained_with_end_of_utterance_token
|
add_end_of_utterance_token = (
|
||||||
|
self.tokenizer_was_trained_with_end_of_utterance_token
|
||||||
|
)
|
||||||
|
|
||||||
# turn non-batched prompts into batched
|
# turn non-batched prompts into batched
|
||||||
if not any(isinstance(i, list) for i in prompts):
|
if not any(isinstance(i, list) for i in prompts):
|
||||||
@ -358,10 +375,14 @@ class IdeficsProcessor(ProcessorMixin):
|
|||||||
current_images = images[:local_max_num_images]
|
current_images = images[:local_max_num_images]
|
||||||
|
|
||||||
if len(current_images) > 0:
|
if len(current_images) > 0:
|
||||||
padded_image_tensor = torch.zeros(max_num_images, *current_images.size()[1:])
|
padded_image_tensor = torch.zeros(
|
||||||
|
max_num_images, *current_images.size()[1:]
|
||||||
|
)
|
||||||
padded_image_tensor[: current_images.size(0)] = current_images
|
padded_image_tensor[: current_images.size(0)] = current_images
|
||||||
else:
|
else:
|
||||||
padded_image_tensor = torch.zeros(max_num_images, *self.default_image_dims)
|
padded_image_tensor = torch.zeros(
|
||||||
|
max_num_images, *self.default_image_dims
|
||||||
|
)
|
||||||
|
|
||||||
output_images.append(padded_image_tensor)
|
output_images.append(padded_image_tensor)
|
||||||
output_input_ids.append(torch.tensor(padded_input_ids))
|
output_input_ids.append(torch.tensor(padded_input_ids))
|
||||||
@ -373,14 +394,19 @@ class IdeficsProcessor(ProcessorMixin):
|
|||||||
output_attention_masks = torch.stack(output_attention_masks)
|
output_attention_masks = torch.stack(output_attention_masks)
|
||||||
|
|
||||||
if at_least_one_image:
|
if at_least_one_image:
|
||||||
image_attention_mask, _ = image_attention_mask_for_packed_input_ids(output_input_ids, self.tokenizer)
|
image_attention_mask, _ = image_attention_mask_for_packed_input_ids(
|
||||||
|
output_input_ids, self.tokenizer
|
||||||
|
)
|
||||||
image_attention_mask = incremental_to_binary_attention_mask(
|
image_attention_mask = incremental_to_binary_attention_mask(
|
||||||
image_attention_mask, num_classes=max_num_images
|
image_attention_mask, num_classes=max_num_images
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# in full language mode we set the image mask to all-0s
|
# in full language mode we set the image mask to all-0s
|
||||||
image_attention_mask = torch.zeros(
|
image_attention_mask = torch.zeros(
|
||||||
output_input_ids.shape[0], output_input_ids.shape[1], 1, dtype=torch.bool
|
output_input_ids.shape[0],
|
||||||
|
output_input_ids.shape[1],
|
||||||
|
1,
|
||||||
|
dtype=torch.bool,
|
||||||
)
|
)
|
||||||
|
|
||||||
return BatchFeature(
|
return BatchFeature(
|
||||||
|
@ -75,7 +75,9 @@ class IdeficsVisionEmbeddings(nn.Module):
|
|||||||
self.image_size = config.image_size
|
self.image_size = config.image_size
|
||||||
self.patch_size = config.patch_size
|
self.patch_size = config.patch_size
|
||||||
|
|
||||||
self.class_embedding = nn.Parameter(weights.get_tensor(f"{prefix}.class_embedding"))
|
self.class_embedding = nn.Parameter(
|
||||||
|
weights.get_tensor(f"{prefix}.class_embedding")
|
||||||
|
)
|
||||||
|
|
||||||
self.patch_embedding = nn.Conv2d.load_no_bias(
|
self.patch_embedding = nn.Conv2d.load_no_bias(
|
||||||
prefix=f"{prefix}.patch_embedding",
|
prefix=f"{prefix}.patch_embedding",
|
||||||
@ -88,17 +90,19 @@ class IdeficsVisionEmbeddings(nn.Module):
|
|||||||
|
|
||||||
self.num_patches = (self.image_size // self.patch_size) ** 2
|
self.num_patches = (self.image_size // self.patch_size) ** 2
|
||||||
self.num_positions = self.num_patches + 1
|
self.num_positions = self.num_patches + 1
|
||||||
# self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim)
|
|
||||||
self.position_embedding = TensorParallelEmbedding(
|
self.position_embedding = TensorParallelEmbedding(
|
||||||
prefix="model.vision_model.embeddings.position_embedding", weights=weights
|
prefix="model.vision_model.embeddings.position_embedding", weights=weights
|
||||||
)
|
)
|
||||||
# self.register_buffer("position_ids", torch.arange(self.num_positions).expand((1, -1)), persistent=False)
|
self.position_ids = (
|
||||||
self.position_ids = weights.get_tensor(f"{prefix}.position_ids")
|
torch.arange(self.num_positions).expand((1, -1)).to(device=weights.device)
|
||||||
|
)
|
||||||
|
|
||||||
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
|
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
|
||||||
batch_size = pixel_values.shape[0]
|
batch_size = pixel_values.shape[0]
|
||||||
target_dtype = self.patch_embedding.weight.dtype
|
target_dtype = self.patch_embedding.weight.dtype
|
||||||
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [*, width, grid, grid]
|
patch_embeds = self.patch_embedding(
|
||||||
|
pixel_values.to(dtype=target_dtype)
|
||||||
|
) # shape = [*, width, grid, grid]
|
||||||
patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
|
patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
|
||||||
|
|
||||||
class_embeds = self.class_embedding.expand(batch_size, 1, -1)
|
class_embeds = self.class_embedding.expand(batch_size, 1, -1)
|
||||||
@ -134,7 +138,6 @@ class IdeficsVisionAttention(nn.Module):
|
|||||||
self.num_heads = self.num_heads // weights.process_group.size()
|
self.num_heads = self.num_heads // weights.process_group.size()
|
||||||
self.embed_dim = self.embed_dim // weights.process_group.size()
|
self.embed_dim = self.embed_dim // weights.process_group.size()
|
||||||
|
|
||||||
|
|
||||||
self.k_proj = TensorParallelColumnLinear.load(
|
self.k_proj = TensorParallelColumnLinear.load(
|
||||||
config, prefix=f"{prefix}.k_proj", weights=weights, bias=True
|
config, prefix=f"{prefix}.k_proj", weights=weights, bias=True
|
||||||
)
|
)
|
||||||
@ -149,7 +152,11 @@ class IdeficsVisionAttention(nn.Module):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
|
||||||
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
|
return (
|
||||||
|
tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
|
||||||
|
.transpose(1, 2)
|
||||||
|
.contiguous()
|
||||||
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
@ -188,7 +195,10 @@ class IdeficsVisionAttention(nn.Module):
|
|||||||
f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is"
|
f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is"
|
||||||
f" {causal_attention_mask.size()}"
|
f" {causal_attention_mask.size()}"
|
||||||
)
|
)
|
||||||
attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + causal_attention_mask
|
attn_weights = (
|
||||||
|
attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
|
||||||
|
+ causal_attention_mask
|
||||||
|
)
|
||||||
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
||||||
|
|
||||||
if attention_mask is not None:
|
if attention_mask is not None:
|
||||||
@ -196,7 +206,10 @@ class IdeficsVisionAttention(nn.Module):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
|
f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
|
||||||
)
|
)
|
||||||
attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
|
attn_weights = (
|
||||||
|
attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
|
||||||
|
+ attention_mask
|
||||||
|
)
|
||||||
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
|
||||||
|
|
||||||
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
|
attn_weights = nn.functional.softmax(attn_weights, dim=-1)
|
||||||
@ -206,12 +219,18 @@ class IdeficsVisionAttention(nn.Module):
|
|||||||
# make sure that attn_weights keeps its gradient.
|
# make sure that attn_weights keeps its gradient.
|
||||||
# In order to do so, attn_weights have to reshaped
|
# In order to do so, attn_weights have to reshaped
|
||||||
# twice and have to be reused in the following
|
# twice and have to be reused in the following
|
||||||
attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
|
attn_weights_reshaped = attn_weights.view(
|
||||||
attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
|
bsz, self.num_heads, tgt_len, src_len
|
||||||
|
)
|
||||||
|
attn_weights = attn_weights_reshaped.view(
|
||||||
|
bsz * self.num_heads, tgt_len, src_len
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
attn_weights_reshaped = None
|
attn_weights_reshaped = None
|
||||||
|
|
||||||
attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
|
attn_probs = nn.functional.dropout(
|
||||||
|
attn_weights, p=self.dropout, training=self.training
|
||||||
|
)
|
||||||
|
|
||||||
attn_output = torch.bmm(attn_probs, value_states)
|
attn_output = torch.bmm(attn_probs, value_states)
|
||||||
|
|
||||||
@ -255,11 +274,15 @@ class IdeficsVisionEncoderLayer(nn.Module):
|
|||||||
def __init__(self, prefix, config, weights):
|
def __init__(self, prefix, config, weights):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.embed_dim = config.hidden_size
|
self.embed_dim = config.hidden_size
|
||||||
self.self_attn = IdeficsVisionAttention(prefix=f"{prefix}.self_attn", config=config, weights=weights)
|
self.self_attn = IdeficsVisionAttention(
|
||||||
|
prefix=f"{prefix}.self_attn", config=config, weights=weights
|
||||||
|
)
|
||||||
self.layer_norm1 = nn.LayerNorm.load(
|
self.layer_norm1 = nn.LayerNorm.load(
|
||||||
prefix=f"{prefix}.layer_norm1", weights=weights, eps=config.layer_norm_eps
|
prefix=f"{prefix}.layer_norm1", weights=weights, eps=config.layer_norm_eps
|
||||||
)
|
)
|
||||||
self.mlp = IdeficsVisionMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
|
self.mlp = IdeficsVisionMLP(
|
||||||
|
prefix=f"{prefix}.mlp", config=config, weights=weights
|
||||||
|
)
|
||||||
self.layer_norm2 = nn.LayerNorm.load(
|
self.layer_norm2 = nn.LayerNorm.load(
|
||||||
prefix=f"{prefix}.layer_norm2", weights=weights, eps=config.layer_norm_eps
|
prefix=f"{prefix}.layer_norm2", weights=weights, eps=config.layer_norm_eps
|
||||||
)
|
)
|
||||||
@ -320,7 +343,11 @@ class IdeficsVisionEncoder(nn.Module):
|
|||||||
self.config = config
|
self.config = config
|
||||||
self.layers = nn.ModuleList(
|
self.layers = nn.ModuleList(
|
||||||
[
|
[
|
||||||
IdeficsVisionEncoderLayer(prefix=f"{prefix}.encoder.layers.{layer_id}", config=config, weights=weights)
|
IdeficsVisionEncoderLayer(
|
||||||
|
prefix=f"{prefix}.encoder.layers.{layer_id}",
|
||||||
|
config=config,
|
||||||
|
weights=weights,
|
||||||
|
)
|
||||||
for layer_id in range(config.num_hidden_layers)
|
for layer_id in range(config.num_hidden_layers)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
@ -364,11 +391,19 @@ class IdeficsVisionEncoder(nn.Module):
|
|||||||
return_dict (`bool`, *optional*):
|
return_dict (`bool`, *optional*):
|
||||||
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
|
||||||
"""
|
"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = (
|
||||||
output_hidden_states = (
|
output_attentions
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
if output_attentions is not None
|
||||||
|
else self.config.output_attentions
|
||||||
|
)
|
||||||
|
output_hidden_states = (
|
||||||
|
output_hidden_states
|
||||||
|
if output_hidden_states is not None
|
||||||
|
else self.config.output_hidden_states
|
||||||
|
)
|
||||||
|
return_dict = (
|
||||||
|
return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
)
|
)
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
||||||
|
|
||||||
encoder_states = () if output_hidden_states else None
|
encoder_states = () if output_hidden_states else None
|
||||||
all_attentions = () if output_attentions else None
|
all_attentions = () if output_attentions else None
|
||||||
@ -408,9 +443,15 @@ class IdeficsVisionEncoder(nn.Module):
|
|||||||
encoder_states = encoder_states + (hidden_states,)
|
encoder_states = encoder_states + (hidden_states,)
|
||||||
|
|
||||||
if not return_dict:
|
if not return_dict:
|
||||||
return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
|
return tuple(
|
||||||
|
v
|
||||||
|
for v in [hidden_states, encoder_states, all_attentions]
|
||||||
|
if v is not None
|
||||||
|
)
|
||||||
return BaseModelOutput(
|
return BaseModelOutput(
|
||||||
last_hidden_state=hidden_states, hidden_states=encoder_states, attentions=all_attentions
|
last_hidden_state=hidden_states,
|
||||||
|
hidden_states=encoder_states,
|
||||||
|
attentions=all_attentions,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -421,13 +462,19 @@ class IdeficsVisionTransformer(nn.Module):
|
|||||||
self.config = config
|
self.config = config
|
||||||
embed_dim = config.hidden_size
|
embed_dim = config.hidden_size
|
||||||
|
|
||||||
self.embeddings = IdeficsVisionEmbeddings(prefix=f"{prefix}.embeddings", config=config, weights=weights)
|
self.embeddings = IdeficsVisionEmbeddings(
|
||||||
|
prefix=f"{prefix}.embeddings", config=config, weights=weights
|
||||||
|
)
|
||||||
self.pre_layrnorm = nn.LayerNorm.load(
|
self.pre_layrnorm = nn.LayerNorm.load(
|
||||||
prefix=f"{prefix}.pre_layrnorm", weights=weights, eps=config.layer_norm_eps
|
prefix=f"{prefix}.pre_layrnorm", weights=weights, eps=config.layer_norm_eps
|
||||||
)
|
)
|
||||||
self.encoder = IdeficsVisionEncoder(prefix=prefix, config=config, weights=weights)
|
self.encoder = IdeficsVisionEncoder(
|
||||||
|
prefix=prefix, config=config, weights=weights
|
||||||
|
)
|
||||||
self.post_layernorm = nn.LayerNorm.load(
|
self.post_layernorm = nn.LayerNorm.load(
|
||||||
prefix=f"{prefix}.post_layernorm", weights=weights, eps=config.layer_norm_eps
|
prefix=f"{prefix}.post_layernorm",
|
||||||
|
weights=weights,
|
||||||
|
eps=config.layer_norm_eps,
|
||||||
)
|
)
|
||||||
|
|
||||||
# copied from transformers.models.clip.modeling_clip.CLIPVisionTransformer.forward
|
# copied from transformers.models.clip.modeling_clip.CLIPVisionTransformer.forward
|
||||||
@ -442,11 +489,19 @@ class IdeficsVisionTransformer(nn.Module):
|
|||||||
Returns:
|
Returns:
|
||||||
|
|
||||||
"""
|
"""
|
||||||
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
output_attentions = (
|
||||||
output_hidden_states = (
|
output_attentions
|
||||||
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
if output_attentions is not None
|
||||||
|
else self.config.output_attentions
|
||||||
|
)
|
||||||
|
output_hidden_states = (
|
||||||
|
output_hidden_states
|
||||||
|
if output_hidden_states is not None
|
||||||
|
else self.config.output_hidden_states
|
||||||
|
)
|
||||||
|
return_dict = (
|
||||||
|
return_dict if return_dict is not None else self.config.use_return_dict
|
||||||
)
|
)
|
||||||
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
|
||||||
|
|
||||||
if pixel_values is None:
|
if pixel_values is None:
|
||||||
raise ValueError("You have to specify pixel_values")
|
raise ValueError("You have to specify pixel_values")
|
||||||
|
@ -49,7 +49,10 @@ from text_generation_server.utils.layers import (
|
|||||||
|
|
||||||
|
|
||||||
CUSTOM_KERNELS_ENABLED = False
|
CUSTOM_KERNELS_ENABLED = False
|
||||||
if not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True":
|
if (
|
||||||
|
torch.cuda.is_available()
|
||||||
|
and not os.environ.get("DISABLE_CUSTOM_KERNELS", "False") == "True"
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
from custom_kernels import fused_attention_cuda
|
from custom_kernels import fused_attention_cuda
|
||||||
|
|
||||||
|
@ -444,14 +444,14 @@ class OPTDecoder(OPTPreTrainedModel):
|
|||||||
|
|
||||||
if config.word_embed_proj_dim != config.hidden_size:
|
if config.word_embed_proj_dim != config.hidden_size:
|
||||||
self.project_out = FastLinear.load(
|
self.project_out = FastLinear.load(
|
||||||
config, prefix="model.decoder.project_out", bias=False
|
config, prefix="model.decoder.project_out", weights=weights, bias=False
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.project_out = None
|
self.project_out = None
|
||||||
|
|
||||||
if config.word_embed_proj_dim != config.hidden_size:
|
if config.word_embed_proj_dim != config.hidden_size:
|
||||||
self.project_in = FastLinear.load(
|
self.project_in = FastLinear.load(
|
||||||
config, prefix="model.decoder.project_in", bias=False
|
config, prefix="model.decoder.project_in", weights=weights, bias=False
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.project_in = None
|
self.project_in = None
|
||||||
|
@ -1032,9 +1032,17 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
|
|||||||
embed_tokens=self.shared,
|
embed_tokens=self.shared,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
self.lm_head = TensorParallelHead.load(
|
self.lm_head = TensorParallelHead.load(
|
||||||
config, prefix="lm_head", weights=weights
|
config, prefix="lm_head", weights=weights
|
||||||
)
|
)
|
||||||
|
except RuntimeError:
|
||||||
|
# Some models like t5-small were saved with shared weights unlike flan
|
||||||
|
# Since they are declared as the same arch we have no choice but hope
|
||||||
|
# that this is OK instead of using a proper flag.
|
||||||
|
self.lm_head = TensorParallelHead.load(
|
||||||
|
config, prefix="shared", weights=weights
|
||||||
|
)
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
|
@ -19,99 +19,17 @@ from text_generation_server.models.types import (
|
|||||||
GeneratedText,
|
GeneratedText,
|
||||||
TopTokens,
|
TopTokens,
|
||||||
)
|
)
|
||||||
|
from text_generation_server.models.cache_manager import (
|
||||||
|
get_cache_manager,
|
||||||
|
set_cache_manager,
|
||||||
|
BLOCK_SIZE,
|
||||||
|
)
|
||||||
from text_generation_server.pb import generate_pb2
|
from text_generation_server.pb import generate_pb2
|
||||||
from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser
|
from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser
|
||||||
from text_generation_server.utils.dist import MEMORY_FRACTION
|
from text_generation_server.utils.dist import MEMORY_FRACTION
|
||||||
|
|
||||||
tracer = trace.get_tracer(__name__)
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
BLOCK_SIZE = 16
|
|
||||||
# Will be set in warmup
|
|
||||||
CACHE_MANAGER: Optional["CacheManager"] = None
|
|
||||||
|
|
||||||
|
|
||||||
class CacheManager:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
num_blocks: int,
|
|
||||||
num_layers: int,
|
|
||||||
num_heads: int,
|
|
||||||
head_size: int,
|
|
||||||
dtype: torch.dtype,
|
|
||||||
device: torch.device,
|
|
||||||
):
|
|
||||||
self.block_size = BLOCK_SIZE
|
|
||||||
self.num_blocks = num_blocks
|
|
||||||
|
|
||||||
element_size = torch.tensor([], dtype=dtype).element_size()
|
|
||||||
x = self.block_size // element_size
|
|
||||||
|
|
||||||
self.kv_cache = [
|
|
||||||
(
|
|
||||||
torch.empty(
|
|
||||||
(num_blocks, num_heads, head_size // x, self.block_size, x),
|
|
||||||
dtype=dtype,
|
|
||||||
device=device,
|
|
||||||
),
|
|
||||||
torch.empty(
|
|
||||||
(num_blocks, num_heads, head_size, self.block_size),
|
|
||||||
dtype=dtype,
|
|
||||||
device=device,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
for _ in range(num_layers)
|
|
||||||
]
|
|
||||||
self.free_block_mask = torch.ones(num_blocks, dtype=torch.int32, device="cpu")
|
|
||||||
self.slots = torch.arange(
|
|
||||||
0, num_blocks * self.block_size, dtype=torch.int32
|
|
||||||
).view(num_blocks, self.block_size)
|
|
||||||
|
|
||||||
def allocate(self, batch: "FlashCausalLMBatch"):
|
|
||||||
# Get free blocks indices by finding values in mask that are not set to 0
|
|
||||||
free_block_indices = self.free_block_mask.nonzero()
|
|
||||||
assert (
|
|
||||||
len(free_block_indices) >= batch.blocks
|
|
||||||
), f"Out of available cache blocks: asked {batch.blocks}, only {len(free_block_indices)} free blocks"
|
|
||||||
|
|
||||||
# Slice by the number of required blocks
|
|
||||||
block_indices = free_block_indices[: batch.blocks]
|
|
||||||
block_indices = block_indices.flatten()
|
|
||||||
|
|
||||||
# Padded block tables
|
|
||||||
block_tables_tensor = torch.zeros(
|
|
||||||
(len(batch), batch.max_blocks), dtype=torch.int32
|
|
||||||
)
|
|
||||||
|
|
||||||
# Allocate paged attention blocks
|
|
||||||
cumulative_blocks = 0
|
|
||||||
slots = []
|
|
||||||
block_tables = []
|
|
||||||
for i, (needed_blocks, needed_slots) in enumerate(batch.needed_blocks_slots):
|
|
||||||
# Get allocated blocks for this sequence
|
|
||||||
allocated_blocks = block_indices[
|
|
||||||
cumulative_blocks : cumulative_blocks + needed_blocks
|
|
||||||
]
|
|
||||||
# Get slots for the allocated blocks
|
|
||||||
allocated_slots = self.slots[allocated_blocks].flatten()[:needed_slots]
|
|
||||||
|
|
||||||
slots.append(allocated_slots)
|
|
||||||
block_tables.append(allocated_blocks.tolist())
|
|
||||||
block_tables_tensor[i, :needed_blocks] = allocated_blocks
|
|
||||||
cumulative_blocks += needed_blocks
|
|
||||||
|
|
||||||
batch.needed_blocks_slots = None
|
|
||||||
batch.block_tables = block_tables
|
|
||||||
batch.block_tables_tensor = block_tables_tensor.to(batch.input_ids.device)
|
|
||||||
batch.slots = torch.concat(slots).to(batch.input_ids.device)
|
|
||||||
|
|
||||||
# Allocate the required number of blocks by setting the mask to 0
|
|
||||||
self.free_block_mask[block_indices] = 0
|
|
||||||
|
|
||||||
def free(self, block_indices: Optional[List[int]]):
|
|
||||||
if block_indices is not None and block_indices:
|
|
||||||
# Reset mask
|
|
||||||
self.free_block_mask[block_indices] = 1
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class FlashCausalLMBatch(Batch):
|
class FlashCausalLMBatch(Batch):
|
||||||
@ -481,7 +399,6 @@ class FlashCausalLMBatch(Batch):
|
|||||||
|
|
||||||
max_blocks = max(max_blocks, len(request_block_table))
|
max_blocks = max(max_blocks, len(request_block_table))
|
||||||
|
|
||||||
global CACHE_MANAGER
|
|
||||||
block_indices_to_free = []
|
block_indices_to_free = []
|
||||||
# Iterate on all requests
|
# Iterate on all requests
|
||||||
for i, r in enumerate(self.requests):
|
for i, r in enumerate(self.requests):
|
||||||
@ -489,7 +406,7 @@ class FlashCausalLMBatch(Batch):
|
|||||||
if r.id not in requests_idx_mapping.keys():
|
if r.id not in requests_idx_mapping.keys():
|
||||||
block_indices_to_free.extend(self.block_tables[i])
|
block_indices_to_free.extend(self.block_tables[i])
|
||||||
# Free blocks
|
# Free blocks
|
||||||
CACHE_MANAGER.free(block_indices_to_free)
|
get_cache_manager().free(block_indices_to_free)
|
||||||
# Needed to avoid dropping blocks when the batches will go out of scope
|
# Needed to avoid dropping blocks when the batches will go out of scope
|
||||||
self.block_tables = None
|
self.block_tables = None
|
||||||
|
|
||||||
@ -508,7 +425,7 @@ class FlashCausalLMBatch(Batch):
|
|||||||
# Move to GPU now that we have the whole tensor
|
# Move to GPU now that we have the whole tensor
|
||||||
slot_indices = slot_indices.to(device)
|
slot_indices = slot_indices.to(device)
|
||||||
|
|
||||||
return FlashCausalLMBatch(
|
return type(self)(
|
||||||
batch_id=self.batch_id,
|
batch_id=self.batch_id,
|
||||||
requests=requests,
|
requests=requests,
|
||||||
requests_idx_mapping=requests_idx_mapping,
|
requests_idx_mapping=requests_idx_mapping,
|
||||||
@ -665,7 +582,7 @@ class FlashCausalLMBatch(Batch):
|
|||||||
b.block_tables = None
|
b.block_tables = None
|
||||||
del b
|
del b
|
||||||
|
|
||||||
return FlashCausalLMBatch(
|
return cls(
|
||||||
batch_id=batches[0].batch_id,
|
batch_id=batches[0].batch_id,
|
||||||
requests=requests,
|
requests=requests,
|
||||||
requests_idx_mapping=requests_idx_mapping,
|
requests_idx_mapping=requests_idx_mapping,
|
||||||
@ -698,9 +615,10 @@ class FlashCausalLMBatch(Batch):
|
|||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if self.block_tables is not None and self.block_tables:
|
if self.block_tables is not None and self.block_tables:
|
||||||
global CACHE_MANAGER
|
|
||||||
# Free blocks
|
# Free blocks
|
||||||
CACHE_MANAGER.free(list(itertools.chain.from_iterable(self.block_tables)))
|
get_cache_manager().free(
|
||||||
|
list(itertools.chain.from_iterable(self.block_tables))
|
||||||
|
)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.requests)
|
return len(self.requests)
|
||||||
@ -718,6 +636,7 @@ class FlashCausalLM(Model):
|
|||||||
device: torch.device,
|
device: torch.device,
|
||||||
rank: int = 0,
|
rank: int = 0,
|
||||||
world_size: int = 1,
|
world_size: int = 1,
|
||||||
|
sliding_window: Optional[int] = None,
|
||||||
):
|
):
|
||||||
self.num_layers = num_layers
|
self.num_layers = num_layers
|
||||||
self.num_kv_heads = num_kv_heads
|
self.num_kv_heads = num_kv_heads
|
||||||
@ -731,6 +650,7 @@ class FlashCausalLM(Model):
|
|||||||
device=device,
|
device=device,
|
||||||
rank=rank,
|
rank=rank,
|
||||||
world_size=world_size,
|
world_size=world_size,
|
||||||
|
sliding_window=sliding_window,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -738,15 +658,14 @@ class FlashCausalLM(Model):
|
|||||||
return FlashCausalLMBatch
|
return FlashCausalLMBatch
|
||||||
|
|
||||||
def warmup(self, batch: FlashCausalLMBatch):
|
def warmup(self, batch: FlashCausalLMBatch):
|
||||||
global CACHE_MANAGER
|
|
||||||
|
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
try:
|
try:
|
||||||
CACHE_MANAGER = CacheManager(
|
cache_manager = set_cache_manager(
|
||||||
batch.blocks,
|
batch.blocks,
|
||||||
self.num_layers,
|
self.num_layers,
|
||||||
self.num_kv_heads,
|
self.num_kv_heads,
|
||||||
self.head_size,
|
self.head_size,
|
||||||
|
self.sliding_window is not None,
|
||||||
self.dtype,
|
self.dtype,
|
||||||
self.device,
|
self.device,
|
||||||
)
|
)
|
||||||
@ -775,53 +694,36 @@ class FlashCausalLM(Model):
|
|||||||
num_blocks = (
|
num_blocks = (
|
||||||
int(free_memory // total_cache_size)
|
int(free_memory // total_cache_size)
|
||||||
# Add batch.blocks as we allocated it above, so it is included in the peak memory.
|
# Add batch.blocks as we allocated it above, so it is included in the peak memory.
|
||||||
+ CACHE_MANAGER.num_blocks
|
+ cache_manager.num_blocks
|
||||||
)
|
)
|
||||||
|
|
||||||
del CACHE_MANAGER
|
|
||||||
del batch
|
del batch
|
||||||
torch.cuda.empty_cache()
|
del cache_manager
|
||||||
|
|
||||||
CACHE_MANAGER = CacheManager(
|
set_cache_manager(
|
||||||
num_blocks,
|
num_blocks,
|
||||||
self.num_layers,
|
self.num_layers,
|
||||||
self.num_kv_heads,
|
self.num_kv_heads,
|
||||||
self.head_size,
|
self.head_size,
|
||||||
|
self.sliding_window is not None,
|
||||||
self.dtype,
|
self.dtype,
|
||||||
self.device,
|
self.device,
|
||||||
)
|
)
|
||||||
|
|
||||||
return int(num_blocks * BLOCK_SIZE)
|
return int(num_blocks * BLOCK_SIZE)
|
||||||
|
|
||||||
def decode(self, generated_ids: Union[torch.Tensor, List[int]]) -> str:
|
def forward(self, batch: FlashCausalLMBatch) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
return self.tokenizer.decode(
|
|
||||||
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(
|
|
||||||
self,
|
|
||||||
input_ids: torch.Tensor,
|
|
||||||
position_ids: torch.Tensor,
|
|
||||||
cu_seqlen_prefill: Optional[torch.Tensor],
|
|
||||||
block_tables: torch.Tensor,
|
|
||||||
slots: torch.Tensor,
|
|
||||||
input_lengths: torch.Tensor,
|
|
||||||
max_s: int,
|
|
||||||
lm_head_indices: Optional[torch.Tensor] = None,
|
|
||||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
||||||
global CACHE_MANAGER
|
|
||||||
|
|
||||||
# Model Forward
|
# Model Forward
|
||||||
return self.model.forward(
|
return self.model.forward(
|
||||||
input_ids=input_ids,
|
input_ids=batch.input_ids,
|
||||||
position_ids=position_ids,
|
position_ids=batch.position_ids,
|
||||||
cu_seqlen_prefill=cu_seqlen_prefill,
|
cu_seqlen_prefill=batch.cu_seqlen_prefill,
|
||||||
kv_cache=CACHE_MANAGER.kv_cache,
|
kv_cache=get_cache_manager().kv_cache,
|
||||||
block_tables=block_tables,
|
block_tables=batch.block_tables_tensor,
|
||||||
slots=slots,
|
slots=batch.slots[batch.slot_indices],
|
||||||
input_lengths=input_lengths,
|
input_lengths=batch.input_lengths_tensor,
|
||||||
max_s=max_s,
|
max_s=batch.max_seqlen,
|
||||||
lm_head_indices=lm_head_indices,
|
lm_head_indices=batch.prefill_head_indices,
|
||||||
)
|
)
|
||||||
|
|
||||||
@tracer.start_as_current_span("generate_token")
|
@tracer.start_as_current_span("generate_token")
|
||||||
@ -833,19 +735,19 @@ class FlashCausalLM(Model):
|
|||||||
|
|
||||||
if batch.needed_blocks_slots:
|
if batch.needed_blocks_slots:
|
||||||
# Allocate blocks to this batch
|
# Allocate blocks to this batch
|
||||||
CACHE_MANAGER.allocate(batch)
|
block_tables, block_tables_tensor, slots = get_cache_manager().allocate(
|
||||||
|
batch.needed_blocks_slots,
|
||||||
|
batch.blocks,
|
||||||
|
batch.max_blocks,
|
||||||
|
batch.input_ids.device,
|
||||||
|
)
|
||||||
|
batch.needed_blocks_slots = None
|
||||||
|
batch.block_tables = block_tables
|
||||||
|
batch.block_tables_tensor = block_tables_tensor
|
||||||
|
batch.slots = slots
|
||||||
|
|
||||||
try:
|
try:
|
||||||
out = self.forward(
|
out = self.forward(batch)
|
||||||
batch.input_ids,
|
|
||||||
batch.position_ids,
|
|
||||||
batch.cu_seqlen_prefill,
|
|
||||||
batch.block_tables_tensor,
|
|
||||||
batch.slots[batch.slot_indices],
|
|
||||||
batch.input_lengths_tensor,
|
|
||||||
batch.max_seqlen,
|
|
||||||
batch.prefill_head_indices,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
del batch
|
del batch
|
||||||
raise e
|
raise e
|
||||||
@ -1008,8 +910,14 @@ class FlashCausalLM(Model):
|
|||||||
if i % self.world_size == self.rank:
|
if i % self.world_size == self.rank:
|
||||||
if stop:
|
if stop:
|
||||||
# Decode generated tokens
|
# Decode generated tokens
|
||||||
output_text = self.decode(
|
output_text, _, _ = self.decode_token(
|
||||||
all_input_ids[-stopping_criteria.current_tokens :]
|
all_input_ids,
|
||||||
|
prefix_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens
|
||||||
|
- 1,
|
||||||
|
read_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens,
|
||||||
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
generated_text = GeneratedText(
|
generated_text = GeneratedText(
|
||||||
output_text,
|
output_text,
|
||||||
|
@ -62,7 +62,7 @@ class FlashLlama(FlashCausalLM):
|
|||||||
|
|
||||||
filenames = weight_files(model_id, revision=revision, extension=".safetensors")
|
filenames = weight_files(model_id, revision=revision, extension=".safetensors")
|
||||||
weights = Weights(filenames, device, dtype, process_group=self.process_group)
|
weights = Weights(filenames, device, dtype, process_group=self.process_group)
|
||||||
if config.quantize == "gptq":
|
if config.quantize in ["gptq", "awq"]:
|
||||||
weights._set_gptq_params(model_id)
|
weights._set_gptq_params(model_id)
|
||||||
|
|
||||||
model = FlashLlamaForCausalLM(config, weights)
|
model = FlashLlamaForCausalLM(config, weights)
|
||||||
|
357
server/text_generation_server/models/flash_mistral.py
Normal file
357
server/text_generation_server/models/flash_mistral.py
Normal file
@ -0,0 +1,357 @@
|
|||||||
|
import math
|
||||||
|
import torch
|
||||||
|
import torch.distributed
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from opentelemetry import trace
|
||||||
|
from transformers import PreTrainedTokenizerBase
|
||||||
|
from transformers.models.llama import LlamaTokenizerFast
|
||||||
|
from typing import Optional, Tuple, Type
|
||||||
|
|
||||||
|
from text_generation_server.pb import generate_pb2
|
||||||
|
from text_generation_server.models import FlashCausalLM
|
||||||
|
from text_generation_server.models.flash_causal_lm import FlashCausalLMBatch, BLOCK_SIZE
|
||||||
|
from text_generation_server.models.cache_manager import (
|
||||||
|
get_cache_manager,
|
||||||
|
set_cache_manager,
|
||||||
|
)
|
||||||
|
from text_generation_server.models.custom_modeling.flash_mistral_modeling import (
|
||||||
|
FlashMistralForCausalLM,
|
||||||
|
MistralConfig,
|
||||||
|
)
|
||||||
|
from text_generation_server.utils import (
|
||||||
|
initialize_torch_distributed,
|
||||||
|
weight_files,
|
||||||
|
Weights,
|
||||||
|
HeterogeneousNextTokenChooser,
|
||||||
|
StoppingCriteria,
|
||||||
|
)
|
||||||
|
|
||||||
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
# Will be set in init
|
||||||
|
SLIDING_WINDOW: Optional[int] = None
|
||||||
|
SLIDING_WINDOW_BLOCKS: Optional[int] = None
|
||||||
|
|
||||||
|
|
||||||
|
# Adds windowing logic to FlashCausalLMBatch
|
||||||
|
@dataclass
|
||||||
|
class FlashMistralBatch(FlashCausalLMBatch):
|
||||||
|
# Prefill cache indices is used to slice into the kv tensor before caching it into the paged attention buffers
|
||||||
|
# as we only keep SLIDING_WINDOW values instead of the whole tensor
|
||||||
|
prefill_cache_indices: Optional[torch.Tensor] = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_pb(
|
||||||
|
cls,
|
||||||
|
pb: generate_pb2.Batch,
|
||||||
|
tokenizer: PreTrainedTokenizerBase,
|
||||||
|
dtype: torch.dtype,
|
||||||
|
device: torch.device,
|
||||||
|
) -> "FlashCausalLMBatch":
|
||||||
|
global SLIDING_WINDOW
|
||||||
|
global SLIDING_WINDOW_BLOCKS
|
||||||
|
|
||||||
|
batch_inputs = []
|
||||||
|
max_truncation = 0
|
||||||
|
for r in pb.requests:
|
||||||
|
batch_inputs.append(r.inputs)
|
||||||
|
max_truncation = max(max_truncation, r.truncate)
|
||||||
|
|
||||||
|
batch_tokenized_inputs = tokenizer(
|
||||||
|
batch_inputs, truncation=True, max_length=max_truncation
|
||||||
|
)["input_ids"]
|
||||||
|
|
||||||
|
position_ids = []
|
||||||
|
cu_seqlen_prefill = [0]
|
||||||
|
needed_blocks_slots = []
|
||||||
|
start_slots = []
|
||||||
|
slot_indices = []
|
||||||
|
prefill_cache_indices = []
|
||||||
|
|
||||||
|
input_lengths = []
|
||||||
|
prefix_offsets = []
|
||||||
|
read_offsets = []
|
||||||
|
all_input_ids = []
|
||||||
|
requests_idx_mapping = {}
|
||||||
|
|
||||||
|
all_prefill_logprobs = True
|
||||||
|
no_prefill_logprobs = True
|
||||||
|
prefill_head_indices = []
|
||||||
|
prefill_next_token_indices = []
|
||||||
|
prefill_cu_outlens = [0]
|
||||||
|
|
||||||
|
next_token_chooser_parameters = []
|
||||||
|
stopping_criterias = []
|
||||||
|
top_n_tokens = []
|
||||||
|
|
||||||
|
# Cumulative length
|
||||||
|
cumulative_length = 0
|
||||||
|
cumulative_max_length = 0
|
||||||
|
prefill_out_cumulative_length = 0
|
||||||
|
|
||||||
|
blocks = 0
|
||||||
|
max_seqlen = 0
|
||||||
|
max_length = 0
|
||||||
|
max_blocks = 0
|
||||||
|
|
||||||
|
# Parse batch
|
||||||
|
for i, (r, tokenized_input) in enumerate(
|
||||||
|
zip(pb.requests, batch_tokenized_inputs)
|
||||||
|
):
|
||||||
|
# request id -> idx in list mapping
|
||||||
|
requests_idx_mapping[r.id] = i
|
||||||
|
|
||||||
|
tokenized_input = tokenized_input[-r.truncate :]
|
||||||
|
|
||||||
|
input_length = len(tokenized_input)
|
||||||
|
input_lengths.append(input_length)
|
||||||
|
|
||||||
|
prefix_offsets.append(input_length - 5)
|
||||||
|
read_offsets.append(input_length)
|
||||||
|
|
||||||
|
all_input_ids.append(tokenized_input)
|
||||||
|
|
||||||
|
# Position ids
|
||||||
|
request_position_ids = torch.arange(0, input_length, dtype=torch.int32)
|
||||||
|
position_ids.append(request_position_ids)
|
||||||
|
|
||||||
|
# Add cumulative lengths of all previous inputs
|
||||||
|
cu_seqlen_prefill.append(cumulative_length + input_length)
|
||||||
|
|
||||||
|
next_token_chooser_parameters.append(r.parameters)
|
||||||
|
|
||||||
|
stopping_criteria = StoppingCriteria.from_pb(
|
||||||
|
r.stopping_parameters, tokenizer
|
||||||
|
)
|
||||||
|
max_new_tokens = stopping_criteria.max_new_tokens
|
||||||
|
stopping_criterias.append(stopping_criteria)
|
||||||
|
top_n_tokens.append(r.top_n_tokens)
|
||||||
|
|
||||||
|
# Paged attention
|
||||||
|
# Remove one as the first token des not have a past
|
||||||
|
total_tokens = input_length + max_new_tokens - 1
|
||||||
|
|
||||||
|
# Needed blocks can not go over SLIDING_WINDOW_BLOCKS
|
||||||
|
needed_blocks = min(
|
||||||
|
math.ceil(total_tokens / BLOCK_SIZE), SLIDING_WINDOW_BLOCKS
|
||||||
|
)
|
||||||
|
blocks += needed_blocks
|
||||||
|
|
||||||
|
needed_blocks_slots.append((needed_blocks, total_tokens))
|
||||||
|
start_slots.append(cumulative_max_length)
|
||||||
|
|
||||||
|
request_slot_indices = torch.arange(
|
||||||
|
cumulative_max_length,
|
||||||
|
cumulative_max_length + input_length,
|
||||||
|
dtype=torch.int64,
|
||||||
|
)
|
||||||
|
slot_indices.append(request_slot_indices)
|
||||||
|
|
||||||
|
# Create tensor to slice into the kv tensor in prefill
|
||||||
|
request_prefill_cache_indices = torch.arange(
|
||||||
|
cumulative_length + max(0, input_length - SLIDING_WINDOW),
|
||||||
|
cumulative_length + input_length,
|
||||||
|
dtype=torch.int64,
|
||||||
|
)
|
||||||
|
prefill_cache_indices.append(request_prefill_cache_indices)
|
||||||
|
|
||||||
|
all_prefill_logprobs = all_prefill_logprobs and r.prefill_logprobs
|
||||||
|
no_prefill_logprobs = no_prefill_logprobs and not r.prefill_logprobs
|
||||||
|
|
||||||
|
if r.prefill_logprobs:
|
||||||
|
prefill_head_indices.append(request_position_ids + cumulative_length)
|
||||||
|
prefill_next_token_indices.append(
|
||||||
|
prefill_out_cumulative_length + input_length - 1
|
||||||
|
)
|
||||||
|
prefill_cu_outlens.append(prefill_out_cumulative_length + input_length)
|
||||||
|
prefill_out_cumulative_length += input_length
|
||||||
|
else:
|
||||||
|
prefill_head_indices.append(
|
||||||
|
torch.tensor(
|
||||||
|
[cumulative_length + input_length - 1], dtype=torch.int32
|
||||||
|
)
|
||||||
|
)
|
||||||
|
prefill_next_token_indices.append(prefill_out_cumulative_length)
|
||||||
|
prefill_cu_outlens.append(prefill_out_cumulative_length + 1)
|
||||||
|
prefill_out_cumulative_length += 1
|
||||||
|
|
||||||
|
# Update
|
||||||
|
cumulative_length += input_length
|
||||||
|
cumulative_max_length += total_tokens
|
||||||
|
max_seqlen = max(max_seqlen, input_length)
|
||||||
|
max_blocks = max(max_blocks, needed_blocks)
|
||||||
|
max_length = max(max_length, input_length + max_new_tokens)
|
||||||
|
|
||||||
|
next_token_chooser = HeterogeneousNextTokenChooser.from_pb(
|
||||||
|
next_token_chooser_parameters, dtype, device
|
||||||
|
)
|
||||||
|
start_slots = torch.tensor(start_slots, dtype=torch.int64)
|
||||||
|
|
||||||
|
# Padded all_input_ids_tensor
|
||||||
|
all_input_ids_tensor = np.zeros(
|
||||||
|
(len(all_input_ids), max_length), dtype=np.int64
|
||||||
|
)
|
||||||
|
for i, input_ids in enumerate(all_input_ids):
|
||||||
|
all_input_ids_tensor[i, : len(input_ids)] = input_ids
|
||||||
|
|
||||||
|
# Create tensors on device
|
||||||
|
all_input_ids_tensor = torch.tensor(
|
||||||
|
all_input_ids_tensor, dtype=torch.int64, device=device
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(pb.requests) > 1:
|
||||||
|
input_ids = np.concatenate(all_input_ids, dtype=np.int64)
|
||||||
|
position_ids = torch.cat(position_ids)
|
||||||
|
slot_indices = torch.cat(slot_indices)
|
||||||
|
prefill_cache_indices = torch.cat(prefill_cache_indices)
|
||||||
|
else:
|
||||||
|
input_ids = all_input_ids[0]
|
||||||
|
position_ids = position_ids[0]
|
||||||
|
slot_indices = slot_indices[0]
|
||||||
|
prefill_cache_indices = prefill_cache_indices[0]
|
||||||
|
|
||||||
|
cu_seqlen_prefill = torch.tensor(
|
||||||
|
cu_seqlen_prefill, device=device, dtype=torch.int32
|
||||||
|
)
|
||||||
|
|
||||||
|
position_ids = position_ids.to(device)
|
||||||
|
slot_indices = slot_indices.to(device)
|
||||||
|
prefill_cache_indices = prefill_cache_indices.to(device)
|
||||||
|
input_ids = torch.tensor(input_ids, dtype=torch.int64, device=device)
|
||||||
|
input_lengths_tensor = torch.tensor(
|
||||||
|
input_lengths, dtype=torch.int32, device=device
|
||||||
|
)
|
||||||
|
|
||||||
|
if all_prefill_logprobs:
|
||||||
|
prefill_head_indices = None
|
||||||
|
prefill_next_token_indices = cu_seqlen_prefill[1:] - 1
|
||||||
|
elif no_prefill_logprobs:
|
||||||
|
prefill_head_indices = cu_seqlen_prefill[1:] - 1
|
||||||
|
prefill_next_token_indices = None
|
||||||
|
else:
|
||||||
|
prefill_head_indices = torch.tensor(
|
||||||
|
torch.cat(prefill_head_indices), dtype=torch.int64, device=device
|
||||||
|
)
|
||||||
|
prefill_next_token_indices = torch.tensor(
|
||||||
|
prefill_next_token_indices, dtype=torch.int64, device=device
|
||||||
|
)
|
||||||
|
top_n_tokens_tensor = torch.tensor(
|
||||||
|
top_n_tokens, device=device, dtype=torch.int64
|
||||||
|
)
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
batch_id=pb.id,
|
||||||
|
requests=pb.requests,
|
||||||
|
requests_idx_mapping=requests_idx_mapping,
|
||||||
|
input_ids=input_ids,
|
||||||
|
position_ids=position_ids,
|
||||||
|
cu_seqlen_prefill=cu_seqlen_prefill,
|
||||||
|
start_slots=start_slots,
|
||||||
|
slot_indices=slot_indices,
|
||||||
|
needed_blocks_slots=needed_blocks_slots,
|
||||||
|
block_tables=None,
|
||||||
|
block_tables_tensor=None,
|
||||||
|
slots=None,
|
||||||
|
max_seqlen=max_seqlen,
|
||||||
|
prefill_head_indices=prefill_head_indices,
|
||||||
|
prefill_next_token_indices=prefill_next_token_indices,
|
||||||
|
prefill_cu_outlens=prefill_cu_outlens,
|
||||||
|
input_lengths=input_lengths,
|
||||||
|
input_lengths_tensor=input_lengths_tensor,
|
||||||
|
prefix_offsets=prefix_offsets,
|
||||||
|
read_offsets=read_offsets,
|
||||||
|
all_input_ids=all_input_ids,
|
||||||
|
all_input_ids_tensor=all_input_ids_tensor,
|
||||||
|
next_token_chooser=next_token_chooser,
|
||||||
|
stopping_criterias=stopping_criterias,
|
||||||
|
top_n_tokens=top_n_tokens,
|
||||||
|
top_n_tokens_tensor=top_n_tokens_tensor,
|
||||||
|
blocks=blocks,
|
||||||
|
max_blocks=max_blocks,
|
||||||
|
prefill_cache_indices=prefill_cache_indices,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class FlashMistral(FlashCausalLM):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
revision: Optional[str] = None,
|
||||||
|
quantize: Optional[str] = None,
|
||||||
|
dtype: Optional[torch.dtype] = None,
|
||||||
|
trust_remote_code: bool = False,
|
||||||
|
):
|
||||||
|
global SLIDING_WINDOW
|
||||||
|
global SLIDING_WINDOW_BLOCKS
|
||||||
|
|
||||||
|
self.process_group, rank, world_size = initialize_torch_distributed()
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = torch.device(f"cuda:{rank}")
|
||||||
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
|
else:
|
||||||
|
raise NotImplementedError("FlashLlama is only available on GPU")
|
||||||
|
|
||||||
|
tokenizer = LlamaTokenizerFast.from_pretrained(
|
||||||
|
model_id,
|
||||||
|
revision=revision,
|
||||||
|
padding_side="left",
|
||||||
|
truncation_side="left",
|
||||||
|
trust_remote_code=trust_remote_code,
|
||||||
|
)
|
||||||
|
|
||||||
|
config = MistralConfig.from_pretrained(
|
||||||
|
model_id, revision=revision, trust_remote_code=trust_remote_code
|
||||||
|
)
|
||||||
|
config.quantize = quantize
|
||||||
|
|
||||||
|
# Set context windows
|
||||||
|
SLIDING_WINDOW = config.sliding_window
|
||||||
|
SLIDING_WINDOW_BLOCKS = math.ceil(config.sliding_window / BLOCK_SIZE)
|
||||||
|
|
||||||
|
torch.distributed.barrier(group=self.process_group)
|
||||||
|
|
||||||
|
filenames = weight_files(model_id, revision=revision, extension=".safetensors")
|
||||||
|
weights = Weights(filenames, device, dtype, process_group=self.process_group)
|
||||||
|
if config.quantize in ["gptq", "awq"]:
|
||||||
|
weights._set_gptq_params(model_id)
|
||||||
|
|
||||||
|
model = FlashMistralForCausalLM(config, weights)
|
||||||
|
|
||||||
|
torch.distributed.barrier(group=self.process_group)
|
||||||
|
super(FlashMistral, self).__init__(
|
||||||
|
model=model,
|
||||||
|
tokenizer=tokenizer,
|
||||||
|
num_layers=len(model.model.layers),
|
||||||
|
num_kv_heads=model.model.num_key_value_heads,
|
||||||
|
head_size=model.model.head_size,
|
||||||
|
dtype=dtype,
|
||||||
|
device=device,
|
||||||
|
rank=rank,
|
||||||
|
world_size=world_size,
|
||||||
|
sliding_window=config.sliding_window,
|
||||||
|
)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def batch_type(self) -> Type[FlashMistralBatch]:
|
||||||
|
return FlashMistralBatch
|
||||||
|
|
||||||
|
def forward(self, batch: FlashMistralBatch) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||||
|
# Model Forward
|
||||||
|
logits = self.model.forward(
|
||||||
|
input_ids=batch.input_ids,
|
||||||
|
position_ids=batch.position_ids,
|
||||||
|
cu_seqlen_prefill=batch.cu_seqlen_prefill,
|
||||||
|
kv_cache=get_cache_manager().kv_cache,
|
||||||
|
block_tables=batch.block_tables_tensor,
|
||||||
|
slots=batch.slots[batch.slot_indices],
|
||||||
|
input_lengths=batch.input_lengths_tensor,
|
||||||
|
max_s=batch.max_seqlen,
|
||||||
|
prefill_cache_indices=batch.prefill_cache_indices,
|
||||||
|
lm_head_indices=batch.prefill_head_indices,
|
||||||
|
)
|
||||||
|
if batch.prefill_cache_indices is not None:
|
||||||
|
batch.prefill_cache_indices = None
|
||||||
|
return logits
|
@ -80,6 +80,7 @@ class GalacticaCausalLMBatch(CausalLMBatch):
|
|||||||
next_token_choosers = []
|
next_token_choosers = []
|
||||||
stopping_criterias = []
|
stopping_criterias = []
|
||||||
prefix_offsets = []
|
prefix_offsets = []
|
||||||
|
top_n_tokens = []
|
||||||
read_offsets = []
|
read_offsets = []
|
||||||
requests_idx_mapping = {}
|
requests_idx_mapping = {}
|
||||||
|
|
||||||
@ -96,6 +97,7 @@ class GalacticaCausalLMBatch(CausalLMBatch):
|
|||||||
r.stopping_parameters, tokenizer
|
r.stopping_parameters, tokenizer
|
||||||
)
|
)
|
||||||
stopping_criterias.append(stopping_criteria)
|
stopping_criterias.append(stopping_criteria)
|
||||||
|
top_n_tokens.append(r.top_n_tokens)
|
||||||
max_truncation = max(max_truncation, r.truncate)
|
max_truncation = max(max_truncation, r.truncate)
|
||||||
max_decode_tokens += stopping_criteria.max_new_tokens
|
max_decode_tokens += stopping_criteria.max_new_tokens
|
||||||
padding_right_offset = max(
|
padding_right_offset = max(
|
||||||
@ -129,6 +131,9 @@ class GalacticaCausalLMBatch(CausalLMBatch):
|
|||||||
position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1
|
position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1
|
||||||
position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1)
|
position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1)
|
||||||
all_input_ids = tokenized_inputs["input_ids"].T.split(1, dim=1)
|
all_input_ids = tokenized_inputs["input_ids"].T.split(1, dim=1)
|
||||||
|
top_n_tokens_tensor = torch.tensor(
|
||||||
|
top_n_tokens, device=device, dtype=torch.int64
|
||||||
|
)
|
||||||
|
|
||||||
max_tokens = len(inputs) * max_input_length + max_decode_tokens
|
max_tokens = len(inputs) * max_input_length + max_decode_tokens
|
||||||
|
|
||||||
@ -146,6 +151,8 @@ class GalacticaCausalLMBatch(CausalLMBatch):
|
|||||||
read_offsets=read_offsets,
|
read_offsets=read_offsets,
|
||||||
next_token_choosers=next_token_choosers,
|
next_token_choosers=next_token_choosers,
|
||||||
stopping_criterias=stopping_criterias,
|
stopping_criterias=stopping_criterias,
|
||||||
|
top_n_tokens=top_n_tokens,
|
||||||
|
top_n_tokens_tensor=top_n_tokens_tensor,
|
||||||
max_input_length=max_input_length.item(),
|
max_input_length=max_input_length.item(),
|
||||||
padding_right_offset=padding_right_offset,
|
padding_right_offset=padding_right_offset,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
@ -167,7 +174,7 @@ class GalacticaSharded(CausalLM):
|
|||||||
dtype = torch.float16 if dtype is None else dtype
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -33,7 +33,7 @@ class GPTNeoxSharded(CausalLM):
|
|||||||
dtype = torch.float16 if dtype is None else dtype
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -42,7 +42,7 @@ class IDEFICSSharded(IdeficsCausalLM):
|
|||||||
dtype = torch.bfloat16 if dtype is None else dtype
|
dtype = torch.bfloat16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
self.device, self.dtype = device, dtype
|
self.device, self.dtype = device, dtype
|
||||||
|
|
||||||
config = IdeficsConfig.from_pretrained(
|
config = IdeficsConfig.from_pretrained(
|
||||||
|
@ -8,7 +8,13 @@ import re
|
|||||||
|
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from opentelemetry import trace
|
from opentelemetry import trace
|
||||||
from transformers import AutoProcessor, AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizerBase, ProcessorMixin
|
from transformers import (
|
||||||
|
AutoProcessor,
|
||||||
|
AutoTokenizer,
|
||||||
|
AutoModelForCausalLM,
|
||||||
|
PreTrainedTokenizerBase,
|
||||||
|
ProcessorMixin,
|
||||||
|
)
|
||||||
from typing import Optional, Tuple, List, Type, Dict
|
from typing import Optional, Tuple, List, Type, Dict
|
||||||
|
|
||||||
from text_generation_server.models import Model
|
from text_generation_server.models import Model
|
||||||
@ -23,7 +29,8 @@ from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sam
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
IMAGES = re.compile(r'!\[[^\]]*\]\((.*?)\s*(\"(?:.*[^\"])\")?\s*\)')
|
IMAGES = re.compile(r"!\[[^\]]*\]\((.*?)\s*(\"(?:.*[^\"])\")?\s*\)")
|
||||||
|
|
||||||
|
|
||||||
def split(string):
|
def split(string):
|
||||||
parts = []
|
parts = []
|
||||||
@ -41,6 +48,7 @@ def split(string):
|
|||||||
|
|
||||||
return parts
|
return parts
|
||||||
|
|
||||||
|
|
||||||
tracer = trace.get_tracer(__name__)
|
tracer = trace.get_tracer(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -141,8 +149,12 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
).to(device)
|
).to(device)
|
||||||
for _ in pb.requests:
|
for _ in pb.requests:
|
||||||
input_len = tokenized_inputs["input_ids"].shape[1]
|
input_len = tokenized_inputs["input_ids"].shape[1]
|
||||||
prefix_offsets.append(input_len - 5) # To decode without potential fallbacks errors
|
prefix_offsets.append(
|
||||||
read_offsets.append(input_len) # To decode without potential fallbacks errors
|
input_len - 5
|
||||||
|
) # To decode without potential fallbacks errors
|
||||||
|
read_offsets.append(
|
||||||
|
input_len
|
||||||
|
) # To decode without potential fallbacks errors
|
||||||
|
|
||||||
input_lengths = tokenized_inputs["attention_mask"].sum(1)
|
input_lengths = tokenized_inputs["attention_mask"].sum(1)
|
||||||
max_input_length = input_lengths.max()
|
max_input_length = input_lengths.max()
|
||||||
@ -158,14 +170,21 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
attention_mask[:, :max_input_length] = tokenized_inputs["attention_mask"]
|
attention_mask[:, :max_input_length] = tokenized_inputs["attention_mask"]
|
||||||
# Do the same for image_attention_mask
|
# Do the same for image_attention_mask
|
||||||
image_attention_mask = input_ids.new_zeros(
|
image_attention_mask = input_ids.new_zeros(
|
||||||
(pb.size, max_input_length + padding_right_offset, tokenized_inputs["pixel_values"].size(1))
|
(
|
||||||
|
pb.size,
|
||||||
|
max_input_length + padding_right_offset,
|
||||||
|
tokenized_inputs["pixel_values"].size(1),
|
||||||
)
|
)
|
||||||
image_attention_mask[:, :max_input_length, :] = tokenized_inputs["image_attention_mask"]
|
)
|
||||||
|
image_attention_mask[:, :max_input_length, :] = tokenized_inputs[
|
||||||
|
"image_attention_mask"
|
||||||
|
]
|
||||||
|
|
||||||
position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1
|
position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1
|
||||||
position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1)
|
position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1)
|
||||||
all_input_ids = tokenized_inputs["input_ids"].T.split(1, dim=1) # It's input_ids but splitted into a tuple of tensors where each tensor is (seq_len, 1) size. It is then transformed into a list
|
all_input_ids = tokenized_inputs["input_ids"].T.split(
|
||||||
|
1, dim=1
|
||||||
|
) # It's input_ids but splitted into a tuple of tensors where each tensor is (seq_len, 1) size. It is then transformed into a list
|
||||||
|
|
||||||
max_tokens = len(inputs) * (max_input_length + max_decode_tokens)
|
max_tokens = len(inputs) * (max_input_length + max_decode_tokens)
|
||||||
|
|
||||||
@ -259,7 +278,7 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
self.image_attention_mask.shape[1] - self.padding_right_offset
|
self.image_attention_mask.shape[1] - self.padding_right_offset
|
||||||
)
|
)
|
||||||
+ new_padding_right_offset,
|
+ new_padding_right_offset,
|
||||||
:
|
:,
|
||||||
]
|
]
|
||||||
if self.image_hidden_states is None:
|
if self.image_hidden_states is None:
|
||||||
image_hidden_states = None
|
image_hidden_states = None
|
||||||
@ -308,7 +327,9 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@tracer.start_as_current_span("concatenate")
|
@tracer.start_as_current_span("concatenate")
|
||||||
def concatenate(cls, batches: List["IdeficsCausalLMBatch"]) -> "IdeficsCausalLMBatch":
|
def concatenate(
|
||||||
|
cls, batches: List["IdeficsCausalLMBatch"]
|
||||||
|
) -> "IdeficsCausalLMBatch":
|
||||||
# It adds new requests to the batch
|
# It adds new requests to the batch
|
||||||
# Used for padding
|
# Used for padding
|
||||||
total_batch_size = 0
|
total_batch_size = 0
|
||||||
@ -383,12 +404,20 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
|
|
||||||
curr_batch_max_num_images = batch.pixel_values.size(1)
|
curr_batch_max_num_images = batch.pixel_values.size(1)
|
||||||
if pixel_values is None:
|
if pixel_values is None:
|
||||||
pixel_values = batch.pixel_values.new_zeros((total_batch_size, max_num_images, 3, 224, 224))
|
pixel_values = batch.pixel_values.new_zeros(
|
||||||
pixel_values[start_index:end_index, :curr_batch_max_num_images] = batch.pixel_values
|
(total_batch_size, max_num_images, 3, 224, 224)
|
||||||
|
)
|
||||||
|
pixel_values[
|
||||||
|
start_index:end_index, :curr_batch_max_num_images
|
||||||
|
] = batch.pixel_values
|
||||||
|
|
||||||
if image_attention_mask is None:
|
if image_attention_mask is None:
|
||||||
image_attention_mask = batch.image_attention_mask.new_zeros(
|
image_attention_mask = batch.image_attention_mask.new_zeros(
|
||||||
(total_batch_size, max_input_length + padding_right_offset, max_num_images)
|
(
|
||||||
|
total_batch_size,
|
||||||
|
max_input_length + padding_right_offset,
|
||||||
|
max_num_images,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
# We need to slice the attention mask to remove padding from previous steps
|
# We need to slice the attention mask to remove padding from previous steps
|
||||||
@ -409,11 +438,9 @@ class IdeficsCausalLMBatch(Batch):
|
|||||||
image_attention_mask[
|
image_attention_mask[
|
||||||
start_index:end_index,
|
start_index:end_index,
|
||||||
left_offset:-padding_right_offset,
|
left_offset:-padding_right_offset,
|
||||||
:curr_batch_max_num_images
|
:curr_batch_max_num_images,
|
||||||
] = batch.image_attention_mask[
|
] = batch.image_attention_mask[
|
||||||
:,
|
:, batch_left_offset : -batch.padding_right_offset, :
|
||||||
batch_left_offset : - batch.padding_right_offset,
|
|
||||||
:
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# Create empty tensor
|
# Create empty tensor
|
||||||
@ -550,7 +577,9 @@ class IdeficsCausalLM(Model):
|
|||||||
dtype: Optional[torch.dtype] = None,
|
dtype: Optional[torch.dtype] = None,
|
||||||
trust_remote_code: bool = False,
|
trust_remote_code: bool = False,
|
||||||
):
|
):
|
||||||
from text_generation_server.models.custom_modeling.idefics_modeling import IdeficsForVisionText2Text
|
from text_generation_server.models.custom_modeling.idefics_modeling import (
|
||||||
|
IdeficsForVisionText2Text,
|
||||||
|
)
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
device = torch.device("cuda")
|
device = torch.device("cuda")
|
||||||
@ -560,7 +589,7 @@ class IdeficsCausalLM(Model):
|
|||||||
raise ValueError("quantization is not available on CPU")
|
raise ValueError("quantization is not available on CPU")
|
||||||
|
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
@ -611,11 +640,6 @@ class IdeficsCausalLM(Model):
|
|||||||
def batch_type(self) -> Type[IdeficsCausalLMBatch]:
|
def batch_type(self) -> Type[IdeficsCausalLMBatch]:
|
||||||
return IdeficsCausalLMBatch
|
return IdeficsCausalLMBatch
|
||||||
|
|
||||||
def decode(self, generated_ids: List[int]) -> str:
|
|
||||||
return self.tokenizer.decode(
|
|
||||||
generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
|
||||||
)
|
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
self,
|
self,
|
||||||
input_ids,
|
input_ids,
|
||||||
@ -655,9 +679,13 @@ class IdeficsCausalLM(Model):
|
|||||||
# this is due to the nature IDEFICS: it's an encoder decoder, and so when decoding, only the currently generated
|
# this is due to the nature IDEFICS: it's an encoder decoder, and so when decoding, only the currently generated
|
||||||
# token need to attend to the encoder hidden states (i.e. the vision encoder)
|
# token need to attend to the encoder hidden states (i.e. the vision encoder)
|
||||||
# Also see seq2seq_lm.Seq2SeqLM.generate_token which has roughly the same logic
|
# Also see seq2seq_lm.Seq2SeqLM.generate_token which has roughly the same logic
|
||||||
image_attention_mask = batch.image_attention_mask[:, -(batch.padding_right_offset+1)].unsqueeze(1)
|
image_attention_mask = batch.image_attention_mask[
|
||||||
|
:, -(batch.padding_right_offset + 1)
|
||||||
|
].unsqueeze(1)
|
||||||
else:
|
else:
|
||||||
image_attention_mask = batch.image_attention_mask[:, : -batch.padding_right_offset]
|
image_attention_mask = batch.image_attention_mask[
|
||||||
|
:, : -batch.padding_right_offset
|
||||||
|
]
|
||||||
|
|
||||||
logits, past, image_hidden_states = self.forward(
|
logits, past, image_hidden_states = self.forward(
|
||||||
input_ids=batch.input_ids,
|
input_ids=batch.input_ids,
|
||||||
@ -728,8 +756,14 @@ class IdeficsCausalLM(Model):
|
|||||||
if i % self.world_size == self.rank:
|
if i % self.world_size == self.rank:
|
||||||
if stop:
|
if stop:
|
||||||
# Decode generated tokens
|
# Decode generated tokens
|
||||||
output_text = self.decode(
|
output_text, _, _ = self.decode_token(
|
||||||
all_input_ids[-stopping_criteria.current_tokens :, 0]
|
all_input_ids[:, 0],
|
||||||
|
prefix_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens
|
||||||
|
- 1,
|
||||||
|
read_offset=len(all_input_ids)
|
||||||
|
- stopping_criteria.current_tokens,
|
||||||
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
# Get seed
|
# Get seed
|
||||||
if isinstance(next_token_chooser.choice, Sampling):
|
if isinstance(next_token_chooser.choice, Sampling):
|
||||||
@ -763,7 +797,7 @@ class IdeficsCausalLM(Model):
|
|||||||
else:
|
else:
|
||||||
prefill_tokens = None
|
prefill_tokens = None
|
||||||
|
|
||||||
top_tokens=None
|
top_tokens = None
|
||||||
|
|
||||||
generation = Generation(
|
generation = Generation(
|
||||||
request.id,
|
request.id,
|
||||||
@ -773,7 +807,7 @@ class IdeficsCausalLM(Model):
|
|||||||
next_token_text,
|
next_token_text,
|
||||||
next_token_id_squeezed.item() in self.all_special_ids,
|
next_token_id_squeezed.item() in self.all_special_ids,
|
||||||
generated_text,
|
generated_text,
|
||||||
top_tokens
|
top_tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
generations.append(generation)
|
generations.append(generation)
|
||||||
@ -795,7 +829,9 @@ class IdeficsCausalLM(Model):
|
|||||||
|
|
||||||
# Update attention_mask as we added a new token to input_ids
|
# Update attention_mask as we added a new token to input_ids
|
||||||
batch.attention_mask[:, -batch.padding_right_offset] = 1
|
batch.attention_mask[:, -batch.padding_right_offset] = 1
|
||||||
batch.image_attention_mask[:, -batch.padding_right_offset, :] = batch.image_attention_mask[:, -(batch.padding_right_offset+1), :]
|
batch.image_attention_mask[
|
||||||
|
:, -batch.padding_right_offset, :
|
||||||
|
] = batch.image_attention_mask[:, -(batch.padding_right_offset + 1), :]
|
||||||
# Decrease right offset
|
# Decrease right offset
|
||||||
batch.padding_right_offset -= 1
|
batch.padding_right_offset -= 1
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ class Model(ABC):
|
|||||||
device: torch.device,
|
device: torch.device,
|
||||||
rank: int = 0,
|
rank: int = 0,
|
||||||
world_size: int = 1,
|
world_size: int = 1,
|
||||||
|
sliding_window: Optional[int] = None,
|
||||||
):
|
):
|
||||||
self.model = model.eval()
|
self.model = model.eval()
|
||||||
self.tokenizer = tokenizer
|
self.tokenizer = tokenizer
|
||||||
@ -30,6 +31,7 @@ class Model(ABC):
|
|||||||
self.device = device
|
self.device = device
|
||||||
self.rank = rank
|
self.rank = rank
|
||||||
self.world_size = world_size
|
self.world_size = world_size
|
||||||
|
self.sliding_window = sliding_window
|
||||||
|
|
||||||
self.has_position_ids = (
|
self.has_position_ids = (
|
||||||
inspect.signature(model.forward).parameters.get("position_ids", None)
|
inspect.signature(model.forward).parameters.get("position_ids", None)
|
||||||
@ -40,10 +42,14 @@ class Model(ABC):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def info(self) -> InfoResponse:
|
def info(self) -> InfoResponse:
|
||||||
|
if self.requires_padding and self.sliding_window is not None:
|
||||||
|
raise NotImplementedError("sliding_window is not implemented with padding")
|
||||||
|
|
||||||
return InfoResponse(
|
return InfoResponse(
|
||||||
requires_padding=self.requires_padding,
|
requires_padding=self.requires_padding,
|
||||||
dtype=str(self.dtype),
|
dtype=str(self.dtype),
|
||||||
device_type=self.device.type,
|
device_type=self.device.type,
|
||||||
|
window_size=self.sliding_window,
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -64,16 +70,18 @@ class Model(ABC):
|
|||||||
all_input_ids: List[int],
|
all_input_ids: List[int],
|
||||||
prefix_offset: int = 0,
|
prefix_offset: int = 0,
|
||||||
read_offset: int = 0,
|
read_offset: int = 0,
|
||||||
|
skip_special_tokens: bool = False,
|
||||||
) -> Tuple[str, int, int]:
|
) -> Tuple[str, int, int]:
|
||||||
"""Hack to hopefully support generate_stream for the maximum number of tokenizers"""
|
"""Hack to hopefully support generate_stream for the maximum number of tokenizers"""
|
||||||
|
|
||||||
# The prefix text is necessary only to defeat cleanup algorithms in the decode
|
# The prefix text is necessary only to defeat cleanup algorithms in the decode
|
||||||
# which decide to add a space or not depending on the surrounding ids.
|
# which decide to add a space or not depending on the surrounding ids.
|
||||||
prefix_text = self.tokenizer.decode(
|
prefix_text = self.tokenizer.decode(
|
||||||
all_input_ids[prefix_offset:read_offset], skip_special_tokens=False
|
all_input_ids[prefix_offset:read_offset],
|
||||||
|
skip_special_tokens=skip_special_tokens,
|
||||||
)
|
)
|
||||||
new_text = self.tokenizer.decode(
|
new_text = self.tokenizer.decode(
|
||||||
all_input_ids[prefix_offset:], skip_special_tokens=False
|
all_input_ids[prefix_offset:], skip_special_tokens=skip_special_tokens
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(new_text) > len(prefix_text) and not new_text.endswith("<EFBFBD>"):
|
if len(new_text) > len(prefix_text) and not new_text.endswith("<EFBFBD>"):
|
||||||
|
@ -43,14 +43,16 @@ class MPTSharded(CausalLM):
|
|||||||
model_id: str,
|
model_id: str,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
quantize: Optional[str] = None,
|
quantize: Optional[str] = None,
|
||||||
|
dtype: Optional[torch.dtype] = None,
|
||||||
trust_remote_code: bool = False,
|
trust_remote_code: bool = False,
|
||||||
):
|
):
|
||||||
self.process_group, rank, world_size = initialize_torch_distributed()
|
self.process_group, rank, world_size = initialize_torch_distributed()
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
device = torch.device(f"cuda:{rank}")
|
device = torch.device(f"cuda:{rank}")
|
||||||
dtype = torch.float16
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("MPTSharded is only available on GPU")
|
device = torch.device("cpu")
|
||||||
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -31,7 +31,7 @@ class OPTSharded(CausalLM):
|
|||||||
dtype = torch.float16 if dtype is None else dtype
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -23,7 +23,7 @@ class RW(CausalLM):
|
|||||||
raise ValueError("quantization is not available on CPU")
|
raise ValueError("quantization is not available on CPU")
|
||||||
|
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -30,7 +30,7 @@ class SantaCoder(CausalLM):
|
|||||||
raise ValueError("quantization is not available on CPU")
|
raise ValueError("quantization is not available on CPU")
|
||||||
|
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -541,7 +541,7 @@ class Seq2SeqLM(Model):
|
|||||||
raise ValueError("quantization is not available on CPU")
|
raise ValueError("quantization is not available on CPU")
|
||||||
|
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
@ -642,7 +642,7 @@ class Seq2SeqLM(Model):
|
|||||||
batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
|
batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
|
||||||
batch.top_n_tokens,
|
batch.top_n_tokens,
|
||||||
batch.top_n_tokens_tensor,
|
batch.top_n_tokens_tensor,
|
||||||
torch.softmax(logits[:, -1], -1),
|
torch.log_softmax(logits[:, -1], -1),
|
||||||
)
|
)
|
||||||
|
|
||||||
# Finished requests
|
# Finished requests
|
||||||
@ -710,8 +710,13 @@ class Seq2SeqLM(Model):
|
|||||||
if stop:
|
if stop:
|
||||||
# Slice with decoder_input_length to remove padding
|
# Slice with decoder_input_length to remove padding
|
||||||
# Decode all tokens
|
# Decode all tokens
|
||||||
output_text = self.decode(
|
output_text, _, _ = self.decode_token(
|
||||||
all_decoder_input_ids[-decoder_input_length:]
|
all_decoder_input_ids,
|
||||||
|
prefix_offset=len(all_decoder_input_ids)
|
||||||
|
- decoder_input_length
|
||||||
|
- 1,
|
||||||
|
read_offset=len(all_decoder_input_ids) - decoder_input_length,
|
||||||
|
skip_special_tokens=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Get seed
|
# Get seed
|
||||||
|
@ -34,7 +34,7 @@ class T5Sharded(Seq2SeqLM):
|
|||||||
dtype = torch.float16 if dtype is None else dtype
|
dtype = torch.float16 if dtype is None else dtype
|
||||||
else:
|
else:
|
||||||
device = torch.device("cpu")
|
device = torch.device("cpu")
|
||||||
dtype = torch.float32
|
dtype = torch.float32 if dtype is None else dtype
|
||||||
|
|
||||||
config = AutoConfig.from_pretrained(
|
config = AutoConfig.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
|
@ -16,6 +16,7 @@ from text_generation_server.pb import generate_pb2_grpc, generate_pb2
|
|||||||
from text_generation_server.tracing import UDSOpenTelemetryAioServerInterceptor
|
from text_generation_server.tracing import UDSOpenTelemetryAioServerInterceptor
|
||||||
from text_generation_server.models.idefics_causal_lm import IdeficsCausalLMBatch
|
from text_generation_server.models.idefics_causal_lm import IdeficsCausalLMBatch
|
||||||
|
|
||||||
|
|
||||||
class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
||||||
def __init__(self, model: Model, cache: Cache, server_urls: List[str]):
|
def __init__(self, model: Model, cache: Cache, server_urls: List[str]):
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
@ -26,7 +27,6 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
|||||||
# Force inference mode for the lifetime of TextGenerationService
|
# Force inference mode for the lifetime of TextGenerationService
|
||||||
self._inference_mode_raii_guard = torch._C._InferenceMode(True)
|
self._inference_mode_raii_guard = torch._C._InferenceMode(True)
|
||||||
|
|
||||||
|
|
||||||
async def Info(self, request, context):
|
async def Info(self, request, context):
|
||||||
return self.model.info
|
return self.model.info
|
||||||
|
|
||||||
@ -55,9 +55,15 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
|||||||
return generate_pb2.FilterBatchResponse(batch=filtered_batch.to_pb())
|
return generate_pb2.FilterBatchResponse(batch=filtered_batch.to_pb())
|
||||||
|
|
||||||
async def Warmup(self, request, context):
|
async def Warmup(self, request, context):
|
||||||
if self.model.batch_type == IdeficsCausalLMBatch: #Hack, i would rather use kwargs in the `from_pb` call
|
if (
|
||||||
|
self.model.batch_type == IdeficsCausalLMBatch
|
||||||
|
): # Hack, i would rather use kwargs in the `from_pb` call
|
||||||
batch = self.model.batch_type.from_pb(
|
batch = self.model.batch_type.from_pb(
|
||||||
request.batch, self.model.tokenizer, self.model.processor, self.model.dtype, self.model.device
|
request.batch,
|
||||||
|
self.model.tokenizer,
|
||||||
|
self.model.processor,
|
||||||
|
self.model.dtype,
|
||||||
|
self.model.device,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
batch = self.model.batch_type.from_pb(
|
batch = self.model.batch_type.from_pb(
|
||||||
@ -70,9 +76,15 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def Prefill(self, request, context):
|
async def Prefill(self, request, context):
|
||||||
if self.model.batch_type == IdeficsCausalLMBatch: #Hack, i would rather use kwargs in the `from_pb` call
|
if (
|
||||||
|
self.model.batch_type == IdeficsCausalLMBatch
|
||||||
|
): # Hack, i would rather use kwargs in the `from_pb` call
|
||||||
batch = self.model.batch_type.from_pb(
|
batch = self.model.batch_type.from_pb(
|
||||||
request.batch, self.model.tokenizer, self.model.processor, self.model.dtype, self.model.device
|
request.batch,
|
||||||
|
self.model.tokenizer,
|
||||||
|
self.model.processor,
|
||||||
|
self.model.dtype,
|
||||||
|
self.model.device,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
batch = self.model.batch_type.from_pb(
|
batch = self.model.batch_type.from_pb(
|
||||||
|
50
server/text_generation_server/utils/awq/quantize/qmodule.py
Normal file
50
server/text_generation_server/utils/awq/quantize/qmodule.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
# Copied logic from https://github.com/mit-han-lab/llm-awq/blob/f084f40bd996f3cf3a0633c1ad7d9d476c318aaa/awq/quantize/qmodule.py
|
||||||
|
|
||||||
|
import math
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import awq_inference_engine # with CUDA kernels
|
||||||
|
|
||||||
|
|
||||||
|
# class ScaledActivation(nn.Module):
|
||||||
|
# def __init__(self, module, scales):
|
||||||
|
# super().__init__()
|
||||||
|
# self.act = module
|
||||||
|
# self.scales = nn.Parameter(scales.data)
|
||||||
|
#
|
||||||
|
# def forward(self, x):
|
||||||
|
# return self.act(x) / self.scales.view(1, 1, -1).to(x.device)
|
||||||
|
|
||||||
|
|
||||||
|
class WQLinear(nn.Module):
|
||||||
|
def __init__(self, w_bit, group_size, qweight, qzeros, scales, bias):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
if w_bit not in [4]:
|
||||||
|
raise NotImplementedError("Only 4-bit are supported for now.")
|
||||||
|
|
||||||
|
self.in_features = qweight.shape[0]
|
||||||
|
self.out_features = qweight.shape[1] * 32 // w_bit
|
||||||
|
|
||||||
|
self.w_bit = w_bit
|
||||||
|
self.group_size = group_size if group_size != -1 else self.in_features
|
||||||
|
# quick sanity check (make sure aligment)
|
||||||
|
assert self.in_features % self.group_size == 0
|
||||||
|
assert self.out_features % (32 // self.w_bit) == 0
|
||||||
|
|
||||||
|
self.qweight = qweight
|
||||||
|
self.qzeros = qzeros
|
||||||
|
self.scales = scales
|
||||||
|
if bias:
|
||||||
|
self.bias = bias
|
||||||
|
else:
|
||||||
|
self.bias = None
|
||||||
|
|
||||||
|
@torch.no_grad()
|
||||||
|
def forward(self, x):
|
||||||
|
out_shape = x.shape[:-1] + (self.out_features,)
|
||||||
|
out = awq_inference_engine.gemm_forward_cuda(
|
||||||
|
x.reshape(-1, x.shape[-1]), self.qweight, self.scales, self.qzeros, 8
|
||||||
|
)
|
||||||
|
out = out + self.bias if self.bias is not None else out
|
||||||
|
return out.reshape(out_shape)
|
@ -29,6 +29,12 @@ def _remove_duplicate_names(
|
|||||||
[name for name in shared if _is_complete(state_dict[name])]
|
[name for name in shared if _is_complete(state_dict[name])]
|
||||||
)
|
)
|
||||||
if not complete_names:
|
if not complete_names:
|
||||||
|
if len(shared) == 1:
|
||||||
|
# Force contiguous
|
||||||
|
name = list(shared)[0]
|
||||||
|
state_dict[name] = state_dict[name].clone()
|
||||||
|
complete_names = {name}
|
||||||
|
else:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue."
|
f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue."
|
||||||
)
|
)
|
||||||
|
@ -57,6 +57,7 @@ def attention(
|
|||||||
cu_seqlens,
|
cu_seqlens,
|
||||||
max_s,
|
max_s,
|
||||||
softmax_scale,
|
softmax_scale,
|
||||||
|
window_size_left=-1,
|
||||||
):
|
):
|
||||||
if HAS_FLASH_ATTN_V2:
|
if HAS_FLASH_ATTN_V2:
|
||||||
return flash_attn_2_cuda.varlen_fwd(
|
return flash_attn_2_cuda.varlen_fwd(
|
||||||
@ -72,11 +73,18 @@ def attention(
|
|||||||
softmax_scale,
|
softmax_scale,
|
||||||
False,
|
False,
|
||||||
True,
|
True,
|
||||||
|
window_size_left,
|
||||||
|
0,
|
||||||
False,
|
False,
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
|
||||||
if HAS_FLASH_ATTN:
|
if HAS_FLASH_ATTN:
|
||||||
|
if window_size_left != 0:
|
||||||
|
raise NotImplementedError(
|
||||||
|
"window_size_left is only available with flash attn v2"
|
||||||
|
)
|
||||||
|
|
||||||
# Flash attention v1 requires q, k and v to have the same number of heads
|
# Flash attention v1 requires q, k and v to have the same number of heads
|
||||||
if k.shape[1] != q.shape[1]:
|
if k.shape[1] != q.shape[1]:
|
||||||
# MQA expand
|
# MQA expand
|
||||||
|
@ -69,10 +69,11 @@ def create_exllama_buffers():
|
|||||||
TEMP_STATE, TEMP_DQ = temp_state, temp_dq
|
TEMP_STATE, TEMP_DQ = temp_state, temp_dq
|
||||||
|
|
||||||
|
|
||||||
class Ex4bitLinear:
|
class Ex4bitLinear(torch.nn.Module):
|
||||||
"""Linear layer implementation with per-group 4-bit quantization of the weights"""
|
"""Linear layer implementation with per-group 4-bit quantization of the weights"""
|
||||||
|
|
||||||
def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
|
def __init__(self, qweight, qzeros, scales, g_idx, bias, bits, groupsize):
|
||||||
|
super().__init__()
|
||||||
global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
|
global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE
|
||||||
assert bits == 4
|
assert bits == 4
|
||||||
|
|
||||||
|
@ -578,7 +578,9 @@ def get_c4_new(nsamples, seed, seqlen, model_id, trust_remote_code):
|
|||||||
return trainloader, valenc
|
return trainloader, valenc
|
||||||
|
|
||||||
|
|
||||||
def get_loaders(name, nsamples=128, seed=0, seqlen=2048, model_id="", trust_remote_code=False):
|
def get_loaders(
|
||||||
|
name, nsamples=128, seed=0, seqlen=2048, model_id="", trust_remote_code=False
|
||||||
|
):
|
||||||
if "wikitext2" in name:
|
if "wikitext2" in name:
|
||||||
return get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code)
|
return get_wikitext2(nsamples, seed, seqlen, model_id, trust_remote_code)
|
||||||
if "ptb" in name:
|
if "ptb" in name:
|
||||||
@ -927,7 +929,7 @@ def quantize(
|
|||||||
seed=seed,
|
seed=seed,
|
||||||
model_id=model_id,
|
model_id=model_id,
|
||||||
seqlen=model.seqlen,
|
seqlen=model.seqlen,
|
||||||
trust_remote_code=trust_remote_code
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
tick = time.time()
|
tick = time.time()
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user