Merge pull request #134 from kdamaszk/rebase_tgi_2.0

Rebase with TGI v2.0
This commit is contained in:
regisss 2024-05-06 09:28:16 +02:00 committed by GitHub
commit 81182bed76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
272 changed files with 86587 additions and 4411 deletions

View File

@ -5,14 +5,14 @@ body:
id: system-info
attributes:
label: System Info
description: |
description: |
Please share your system info with us (`text-generation-launcher --env` if installed locally).
The full command line used that causes issues:
The full command line used that causes issues:
OS version:
Rust version (if self-compiling, `cargo version`):
Model being used (`curl 127.0.0.1:8080/info | jq`):
If local model please explicit the kind of model and/or equivalents.
Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`):
Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`):
Deployment specificities (Kubernetes, EKS, AKS, any particular deployments):
The current version being used:
@ -52,11 +52,11 @@ body:
placeholder: |
Steps to reproduce the behavior:
1.
2.
3.
- type: textarea
id: expected-behavior

View File

@ -19,7 +19,7 @@ body:
label: Motivation
description: |
Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
- type: textarea
id: contribution

View File

@ -6,15 +6,15 @@ on:
jobs:
update_docs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Install Launcher
id: install-launcher
run: cargo install --git https://github.com/${{ github.repository }} --branch ${{ github.head_ref }} text-generation-launcher
- name: Check launcher Docs are up-to-date
run: |
echo text-generation-launcher --help

View File

@ -146,11 +146,50 @@ jobs:
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
integration-tests:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs:
- start-runner
- build-and-push-image # Wait for the docker image to be built
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
DOCKER_VOLUME: /cache
steps:
- uses: actions/checkout@v2
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Prepare disks
run: |
sudo mkfs -t ext4 /dev/nvme1n1
sudo mkdir ${{ env.DOCKER_VOLUME }}
sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
- name: Install
run: |
make install-integration-tests
- name: Run tests
run: |
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
pytest -s -vv integration-tests
build-and-push-image-rocm:
concurrency:
group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs: start-runner # required to start the main job when the runner is ready
needs:
- start-runner
- build-and-push-image # Wait for the main docker image to be built
- integration-tests # Wait for the main integration-tests
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
permissions:
contents: write
@ -235,43 +274,6 @@ jobs:
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
integration-tests:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs:
- start-runner
- build-and-push-image # Wait for the docker image to be built
- build-and-push-image-rocm
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
env:
DOCKER_VOLUME: /cache
steps:
- uses: actions/checkout@v2
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Tailscale
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
with:
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
- name: Prepare disks
run: |
sudo mkfs -t ext4 /dev/nvme1n1
sudo mkdir ${{ env.DOCKER_VOLUME }}
sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
- name: Install
run: |
make install-integration-tests
- name: Run tests
run: |
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
pytest -s -vv integration-tests
stop-runner:
name: Stop self-hosted EC2 runner
needs:

View File

@ -16,4 +16,4 @@ jobs:
commit_sha: ${{ github.event.pull_request.head.sha }}
pr_number: ${{ github.event.number }}
package: text-generation-inference
additional_args: --not_python_module
additional_args: --not_python_module

View File

@ -1,12 +0,0 @@
name: Delete doc comment
on:
pull_request:
types: [ closed ]
jobs:
delete:
uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main
with:
pr_number: ${{ github.event.number }}

14
.github/workflows/stale.yml vendored Normal file
View File

@ -0,0 +1,14 @@
name: 'Close stale issues and PRs'
on:
schedule:
- cron: '30 1 * * *'
jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v8
with:
stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
days-before-stale: 30
days-before-close: 5

View File

@ -33,11 +33,18 @@ jobs:
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: 1.71.0
# Released on: 28 December, 2023
# Branched from master on: 10 November, 2023
# https://releases.rs/docs/1.75.0/
toolchain: 1.75.0
override: true
components: rustfmt, clippy
- name: Install Protoc
uses: arduino/setup-protoc@v1
- name: Clean unused files
run: |
sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
- name: Install sccache
run: |
curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache
@ -68,12 +75,11 @@ jobs:
pip install pytest
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
pytest -s -vv server/tests
- name: Run Rust fmt
- name: Pre-commit checks
run: |
cargo fmt --check
- name: Run Rust clippy
run: |
cargo clippy
pip install pre-commit
pre-commit install
pre-commit run --all-files
- name: Run Rust tests
run: |
cargo test

View File

@ -13,4 +13,4 @@ jobs:
package_name: text-generation-inference
secrets:
hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}

9
.gitignore vendored
View File

@ -2,3 +2,12 @@
target
router/tokenizer.json
*__pycache__*
# ROCm auto-generated files
*.hip
server/exllamav2_kernels/exllamav2_kernels/hip/
server/exllama_kernels/exllama_kernels/hip/
server/exllama_kernels/exllama_kernels/hip_func/
*_hip.cuh
server/exllama_kernels/exllama_kernels/hip_buffers.cuh
server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp

18
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,18 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: docs/source/basic_tutorials/launcher.md
- repo: https://github.com/psf/black
rev: 24.2.0
hooks:
- id: black
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0
hooks:
- id: fmt
- id: cargo-check
- id: clippy

1070
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@ members = [
resolver = "2"
[workspace.package]
version = "1.2.0"
version = "2.0.0"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"
@ -17,5 +17,7 @@ homepage = "https://github.com/huggingface/text-generation-inference"
[profile.release]
debug = 1
incremental = true
lto = "off"
lto = "fat"
opt-level = 3
codegen-units = 1
panic = "abort"

View File

@ -31,7 +31,7 @@ COPY launcher launcher
RUN cargo build --release
# Text Generation Inference base image
FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest as base
FROM vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest as base
# Text Generation Inference base env
ENV HUGGINGFACE_HUB_CACHE=/data \
@ -58,8 +58,8 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements.txt && \
bash ./dill-0.3.7-patch.sh && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 && \
bash ./dill-0.3.8-patch.sh && \
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.0 && \
pip install . --no-cache-dir
# Install benchmarker
@ -72,5 +72,7 @@ COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/
# Final image
FROM base
ENTRYPOINT ["text-generation-launcher"]
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
ENTRYPOINT ["/tgi-entrypoint.sh"]
CMD ["--json-output"]

View File

@ -1,5 +1,5 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
WORKDIR /usr/src
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@ -36,7 +36,7 @@ COPY launcher launcher
RUN cargo build --release
# Text Generation Inference base image for RoCm
FROM rocm/dev-ubuntu-20.04:5.7 as base
FROM rocm/dev-ubuntu-22.04:5.7 as base
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
build-essential \
@ -75,8 +75,8 @@ RUN chmod +x ~/mambaforge.sh && \
mamba init && \
rm ~/mambaforge.sh
# Install PyTorch nightly (2.2.0.dev2023) compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
RUN pip install --pre torch==2.2.0.dev20231106 --index-url https://download.pytorch.org/whl/nightly/rocm5.7
# Install PyTorch 2.2 RC compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
RUN pip install torch --index-url https://download.pytorch.org/whl/test/rocm5.7/
FROM base AS kernel-builder
@ -104,6 +104,20 @@ WORKDIR /usr/src
COPY server/custom_kernels/ .
RUN PYTORCH_ROCM_ARCH=gfx90a python setup.py build
# Build exllama kernels
FROM kernel-builder as exllama-kernels-builder
WORKDIR /usr/src
COPY server/exllama_kernels/ .
RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
# Build exllama v2 kernels
FROM kernel-builder as exllamav2-kernels-builder
WORKDIR /usr/src
COPY server/exllamav2_kernels/ .
RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
FROM base as base-copy
# Text Generation Inference base env
@ -120,6 +134,12 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
# Copy build artifacts from custom kernels builder
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from exllama kernels builder
COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from exllamav2 kernels builder
COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Install flash-attention dependencies
RUN pip install einops --no-cache-dir
@ -130,7 +150,7 @@ COPY server/Makefile server/Makefile
RUN cd server && \
make gen-server && \
pip install -r requirements_rocm.txt && \
pip install ".[accelerate, peft]" --no-cache-dir
pip install ".[accelerate, peft, outlines]" --no-cache-dir
# Install benchmarker
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

318
LICENSE
View File

@ -1,181 +1,201 @@
Hugging Face Optimized Inference License 1.0 (HFOILv1.0)
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
This License Agreement governs the use of the Software and its Modifications. It is a
binding agreement between the Licensor and You.
1. Definitions.
This License Agreement shall be referred to as Hugging Face Optimized Inference License
1.0 or HFOILv1.0. We may publish revised versions of this License Agreement from time to
time. Each version will be given a distinguished number.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
By downloading, accessing, modifying, distributing or otherwise using the Software, You
consent to all of the terms and conditions below. So, if You do not agree with those,
please do not download, access, modify, distribute, or use the Software.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
1. PERMISSIONS
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
You may use, modify and distribute the Software pursuant to the following terms and
conditions:
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
Copyright License. Subject to the terms and conditions of this License Agreement and where
and as applicable, each Contributor hereby grants You a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce, prepare, publicly display,
publicly perform, sublicense under the terms herein, and distribute the Software and
Modifications of the Software.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
Patent License. Subject to the terms and conditions of this License Agreement and where
and as applicable, each Contributor hereby grants You a perpetual, worldwide,
non-exclusive, royalty-free patent license to make, have made, Use, offer to sell, sell,
import, and otherwise transfer the Software, where such license applies only to those
patent claims licensable by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s) with the Software to
which such Contribution(s) was submitted. If You institute patent litigation against any
entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Software
or a Contribution incorporated within the Software constitutes direct or contributory
patent infringement, then any rights granted to You under this License Agreement for the
Software shall terminate as of the date such litigation is filed.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
No other rights. All rights not expressly granted herein are retained.
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
2. RESTRICTIONS
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
You may not distribute the Software as a hosted or managed, and paid service, where the
service grants users access to any substantial set of the features or functionality of the
Software. If you wish to do so, You will need to be granted additional rights from the
Licensor which will be subject to a separate mutually agreed agreement.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
You may not sublicense the Software under any other terms than those listed in this
License.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
3. OBLIGATIONS
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
When You modify the Software, You agree to: - attach a notice stating the Modifications of
the Software You made; and - attach a notice stating that the Modifications of the
Software are released under this License Agreement.
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
When You distribute the Software or Modifications of the Software, You agree to: - give
any recipients of the Software a copy of this License Agreement; - retain all Explanatory
Documentation; and if sharing the Modifications of the Software, add Explanatory
Documentation documenting the changes made to create the Modifications of the Software; -
retain all copyright, patent, trademark and attribution notices.
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
4. MISCELLANEOUS
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
Termination. Licensor reserves the right to restrict Use of the Software in violation of
this License Agreement, upon which Your licenses will automatically terminate.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
Contributions. Unless You explicitly state otherwise, any Contribution intentionally
submitted for inclusion in the Software by You to the Licensor shall be under the terms
and conditions of this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify the terms of any
separate license agreement you may have executed with Licensor regarding such
Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
Trademarks and related. Nothing in this License Agreement permits You (i) to make Use of
Licensors trademarks, trade names, or logos, (ii) otherwise suggest endorsement by
Licensor, or (iii) misrepresent the relationship between the parties; and any rights not
expressly granted herein are reserved by the Licensors.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
Output You generate. Licensor claims no rights in the Output. You agree not to contravene
any provision as stated in the License Agreement with your Use of the Output.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
Disclaimer of Warranty. Except as expressly provided otherwise herein, and to the fullest
extent permitted by law, Licensor provides the Software (and each Contributor provides its
Contributions) AS IS, and Licensor disclaims all warranties or guarantees of any kind,
express or implied, whether arising under any law or from any usage in trade, or otherwise
including but not limited to the implied warranties of merchantability, non-infringement,
quiet enjoyment, fitness for a particular purpose, or otherwise. You are solely
responsible for determining the appropriateness of the Software and Modifications of the
Software for your purposes (including your use or distribution of the Software and
Modifications of the Software), and assume any risks associated with Your exercise of
permissions under this License Agreement.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
Limitation of Liability. In no event and under no legal theory, whether in tort (including
negligence), contract, or otherwise, unless required by applicable law (such as deliberate
and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to
You for damages, including any direct, indirect, special, incidental, or consequential
damages of any character arising as a result of this License Agreement or out of the Use
or inability to Use the Software (including but not limited to damages for loss of
goodwill, work stoppage, computer failure or malfunction, model failure or malfunction, or
any and all other commercial damages or losses), even if such Contributor has been advised
of the possibility of such damages.
END OF TERMS AND CONDITIONS
Accepting Warranty or Additional Liability. While sharing the Software or Modifications of
the Software thereof, You may choose to offer and charge a fee for, acceptance of support,
warranty, indemnity, or other liability obligations and/or rights consistent with this
License Agreement. However, in accepting such obligations, You may act only on Your own
behalf and on Your sole responsibility, not on behalf of Licensor or any other
Contributor, and you hereby agree to indemnify, defend, and hold Licensor and each other
Contributor (and their successors or assigns) harmless for any liability incurred by, or
claims asserted against, such Licensor or Contributor (and their successors or assigns) by
reason of your accepting any such warranty or additional liability.
APPENDIX: How to apply the Apache License to your work.
Severability. This License Agreement is a license of copyright and patent rights and an
agreement in contract between You and the Licensor. If any provision of this License
Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall
be unaffected thereby and remain valid as if such provision had not been set forth herein.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2022 Hugging Face
5. DEFINITIONS
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
“Contribution” refers to any work of authorship, including the original version of the
Software and any Modifications of the Software that is intentionally submitted to Licensor
for inclusion in the Software by the copyright owner or by an individual or entity
authorized to submit on behalf of the copyright owner. For the purposes of this
definition, “submitted” means any form of electronic, verbal, or written communication
sent to the Licensor or its representatives, including but not limited to communication on
electronic mailing lists, source code control systems, and issue tracking systems that are
managed by, or on behalf of, the Licensor for the purpose of discussing and improving the
Software, but excluding communication that is conspicuously marked or otherwise designated
in writing by the copyright owner as “Not a Contribution.”
http://www.apache.org/licenses/LICENSE-2.0
“Contributor” refers to Licensor and any individual or entity on behalf of whom a
Contribution has been received by Licensor and subsequently incorporated within the
Software.
“Data” refers to a collection of information extracted from the dataset used with the
Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not
licensed under this License Agreement.
“Explanatory Documentation” refers to any documentation or related information including
but not limited to model cards or data cards dedicated to inform the public about the
characteristics of the Software. Explanatory documentation is not licensed under this
License.
"License Agreement" refers to these terms and conditions.
“Licensor” refers to the rights owners or entity authorized by the rights owners that are
granting the terms and conditions of this License Agreement.
“Model” refers to machine-learning based assemblies (including checkpoints), consisting of
learnt weights and parameters (including optimizer states), corresponding to a model
architecture as embodied in Software source code. Source code is not licensed under this
License Agreement.
“Modifications of the Software” refers to all changes to the Software, including without
limitation derivative works of the Software.
“Output” refers to the results of operating the Software.
“Share” refers to any transmission, reproduction, publication or other sharing of the
Software or Modifications of the Software to a third party, including providing the
Softwaire as a hosted service made available by electronic or other remote means,
including - but not limited to - API-based or web access.
“Software” refers to the software and Model (or parts of either) that Licensor makes
available under this License Agreement.
“Third Parties” refers to individuals or legal entities that are not under common control
with Licensor or You.
“Use” refers to anything You or your representatives do with the Software, including but
not limited to generating any Output, fine tuning, updating, running, training, evaluating
and/or reparametrizing the Model.
"You" (or "Your") refers to an individual or Legal Entity exercising permissions granted
by this License Agreement and/or making Use of the Software for whichever purpose and in
any field of Use.
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -44,7 +44,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene
model=meta-llama/Llama-2-7b-hf
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model
docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --max-input-length 1024 --max-total-tokens 2048
```
> For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to pass `-e HUGGING_FACE_HUB_TOKEN=<token>` to the `docker run` command above with a valid Hugging Face Hub read token.
@ -53,7 +53,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene
model=meta-llama/Llama-2-70b-hf
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8
docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8 --max-input-length 1024 --max-total-tokens 2048
```
3. You can then send a simple request:
```bash

Binary file not shown.

Before

Width:  |  Height:  |  Size: 334 KiB

BIN
assets/architecture.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 930 KiB

View File

@ -29,4 +29,3 @@ tui = {package = "ratatui", version = "0.23", default-features = false, features
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
hf-hub = "0.3.1"

View File

@ -6,12 +6,12 @@
</div>
A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha)
A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha)
and powered by [tui](https://github.com/tui-rs-revival/ratatui).
## Install
## Install
```shell
```shell
make install-benchmark
```
@ -27,4 +27,4 @@ Then run the benchmarking tool:
```shell
text-generation-benchmark --tokenizer-name bigscience/bloom-560m
```
```

View File

@ -444,7 +444,7 @@ fn progress_gauge(title: &str, label: String, progress: f64, color: Color) -> Ga
}
/// Throughput paragraph
fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragraph<'a> {
fn throughput_paragraph<'a>(throughput: &[f64], name: &'static str) -> Paragraph<'a> {
// Throughput average/high/low texts
let throughput_texts = statis_spans(throughput, "tokens/secs");
@ -457,7 +457,7 @@ fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragr
}
/// Latency paragraph
fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragraph<'a> {
fn latency_paragraph<'a>(latency: &mut [f64], name: &'static str) -> Paragraph<'a> {
// Latency average/high/low texts
let mut latency_texts = statis_spans(latency, "ms");
@ -466,7 +466,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
let latency_percentiles = crate::utils::percentiles(latency, &[50, 90, 99]);
// Latency p50/p90/p99 texts
let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
let colors = [Color::LightGreen, Color::LightYellow, Color::LightRed];
for (i, (name, value)) in latency_percentiles.iter().enumerate() {
let span = Line::from(vec![Span::styled(
format!("{name}: {value:.2} ms"),
@ -483,7 +483,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
}
/// Average/High/Low spans
fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Line<'a>> {
fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
vec![
Line::from(vec![Span::styled(
format!(
@ -543,7 +543,7 @@ fn latency_histogram<'a>(
/// Latency/Throughput chart
fn latency_throughput_chart<'a>(
latency_throughput: &'a Vec<(f64, f64)>,
latency_throughput: &'a [(f64, f64)],
batch_sizes: &'a [u32],
zoom: bool,
name: &'static str,

View File

@ -163,7 +163,7 @@ async fn prefill(
// Run prefill
let start_time = Instant::now();
let (_, decode_batch) = client.prefill(batch.clone()).await?;
let (_, decode_batch, _) = client.prefill(batch.clone()).await?;
// Get latency
let latency = start_time.elapsed();

View File

@ -8,7 +8,7 @@ use crate::app::App;
use crate::event::Event;
use crossterm::ExecutableCommand;
use std::io;
use text_generation_client::{NextTokenChooserParameters, ShardedClient};
use text_generation_client::{GrammarType, NextTokenChooserParameters, ShardedClient};
use tokenizers::Tokenizer;
use tokio::sync::{broadcast, mpsc};
use tui::backend::CrosstermBackend;
@ -30,6 +30,7 @@ pub async fn run(
top_p: Option<f32>,
typical_p: Option<f32>,
repetition_penalty: Option<f32>,
frequency_penalty: Option<f32>,
watermark: bool,
do_sample: bool,
client: ShardedClient,
@ -42,7 +43,10 @@ pub async fn run(
do_sample,
seed: 0,
repetition_penalty: repetition_penalty.unwrap_or(1.0),
frequency_penalty: frequency_penalty.unwrap_or(0.0),
watermark,
grammar: String::new(),
grammar_type: GrammarType::None as i32,
};
// Initialize terminal properties
@ -140,6 +144,7 @@ pub async fn run(
top_p,
typical_p,
repetition_penalty,
frequency_penalty,
watermark,
do_sample,
);

View File

@ -84,6 +84,11 @@ struct Args {
#[clap(long, env)]
repetition_penalty: Option<f32>,
/// Generation parameter in case you want to specifically test/debug particular
/// decoding strategies, for full doc refer to the `text-generation-server`
#[clap(long, env)]
frequency_penalty: Option<f32>,
/// Generation parameter in case you want to specifically test/debug particular
/// decoding strategies, for full doc refer to the `text-generation-server`
#[clap(long, env)]
@ -119,6 +124,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
top_p,
typical_p,
repetition_penalty,
frequency_penalty,
watermark,
do_sample,
master_shard_uds_path,
@ -187,6 +193,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
top_p,
typical_p,
repetition_penalty,
frequency_penalty,
watermark,
do_sample,
sharded_client,

View File

@ -15,6 +15,7 @@ pub(crate) fn parameters_table(
top_p: Option<f32>,
typical_p: Option<f32>,
repetition_penalty: Option<f32>,
frequency_penalty: Option<f32>,
watermark: bool,
do_sample: bool,
) -> Table {
@ -33,6 +34,7 @@ pub(crate) fn parameters_table(
builder.push_record(["Top P", &format!("{top_p:?}")]);
builder.push_record(["Typical P", &format!("{typical_p:?}")]);
builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]);
builder.push_record(["Watermark", &watermark.to_string()]);
builder.push_record(["Do Sample", &do_sample.to_string()]);
@ -149,7 +151,7 @@ fn add_throuhgputs(
}
}
fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
let average = data.iter().sum::<f64>() / data.len() as f64;
let min = data
.iter()
@ -162,7 +164,7 @@ fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
(average, *min, *max)
}
fn px(data: &Vec<f64>, p: u32) -> f64 {
fn px(data: &[f64], p: u32) -> f64 {
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
*data.get(i).unwrap_or(&std::f64::NAN)
}

View File

@ -155,4 +155,4 @@ dmypy.json
cython_debug/
transformers
safetensors
safetensors

View File

@ -3,4 +3,4 @@ unit-tests:
install:
pip install pip --upgrade
pip install -e .
pip install -e .

View File

@ -107,7 +107,19 @@ print(text)
### Types
```python
# Request Parameters
# enum for grammar type
class GrammarType(Enum):
Json = "json"
Regex = "regex"
# Grammar type and value
class Grammar:
# Grammar type
type: GrammarType
# Grammar value
value: Union[str, dict]
class Parameters:
# Activate logits sampling
do_sample: bool
@ -116,6 +128,10 @@ class Parameters:
# The parameter for repetition penalty. 1.0 means no penalty.
# See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
repetition_penalty: Optional[float]
# The parameter for frequency penalty. 1.0 means no penalty
# Penalize new tokens based on their existing frequency in the text so far,
# decreasing the model's likelihood to repeat the same line verbatim.
frequency_penalty: Optional[float]
# Whether to prepend the prompt to the generated text
return_full_text: bool
# Stop generating tokens if a member of `stop_sequences` is generated
@ -138,10 +154,22 @@ class Parameters:
best_of: Optional[int]
# Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
watermark: bool
# Get generation details
details: bool
# Get decoder input token logprobs and ids
decoder_input_details: bool
# Return the N most likely tokens at each step
top_n_tokens: Optional[int]
top_n_tokens: Optional[int]
# grammar to use for generation
grammar: Optional[Grammar]
class Request:
# Prompt
inputs: str
# Generation parameters
parameters: Optional[Parameters]
# Whether to stream output tokens
stream: bool
# Decoder input tokens
class InputToken:
@ -161,7 +189,7 @@ class Token:
# Token text
text: str
# Logprob
logprob: float
logprob: Optional[float]
# Is the token a special token
# Can be used to ignore tokens when concatenating
special: bool
@ -192,7 +220,7 @@ class BestOfSequence:
# Generated tokens
tokens: List[Token]
# Most likely tokens
top_tokens: Optional[List[List[Token]]]
top_tokens: Optional[List[List[Token]]]
# `generate` details
@ -236,7 +264,7 @@ class StreamResponse:
# Generated token
token: Token
# Most likely tokens
top_tokens: Optional[List[Token]]
top_tokens: Optional[List[Token]]
# Complete generated text
# Only available when the generation is finished
generated_text: Optional[str]
@ -248,4 +276,4 @@ class StreamResponse:
class DeployedModel:
model_id: str
sha: str
```
```

View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "aiohttp"
@ -707,18 +707,19 @@ files = [
[[package]]
name = "pydantic"
version = "2.4.2"
version = "2.5.3"
description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.7"
files = [
{file = "pydantic-2.4.2-py3-none-any.whl", hash = "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1"},
{file = "pydantic-2.4.2.tar.gz", hash = "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7"},
{file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"},
{file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"},
]
[package.dependencies]
annotated-types = ">=0.4.0"
pydantic-core = "2.10.1"
importlib-metadata = {version = "*", markers = "python_version == \"3.7\""}
pydantic-core = "2.14.6"
typing-extensions = ">=4.6.1"
[package.extras]
@ -726,117 +727,116 @@ email = ["email-validator (>=2.0.0)"]
[[package]]
name = "pydantic-core"
version = "2.10.1"
version = "2.14.6"
description = ""
optional = false
python-versions = ">=3.7"
files = [
{file = "pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63"},
{file = "pydantic_core-2.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e"},
{file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e"},
{file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e"},
{file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6"},
{file = "pydantic_core-2.10.1-cp310-none-win32.whl", hash = "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b"},
{file = "pydantic_core-2.10.1-cp310-none-win_amd64.whl", hash = "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0"},
{file = "pydantic_core-2.10.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea"},
{file = "pydantic_core-2.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8"},
{file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4"},
{file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607"},
{file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f"},
{file = "pydantic_core-2.10.1-cp311-none-win32.whl", hash = "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6"},
{file = "pydantic_core-2.10.1-cp311-none-win_amd64.whl", hash = "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27"},
{file = "pydantic_core-2.10.1-cp311-none-win_arm64.whl", hash = "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325"},
{file = "pydantic_core-2.10.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921"},
{file = "pydantic_core-2.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901"},
{file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d"},
{file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f"},
{file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c"},
{file = "pydantic_core-2.10.1-cp312-none-win32.whl", hash = "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f"},
{file = "pydantic_core-2.10.1-cp312-none-win_amd64.whl", hash = "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430"},
{file = "pydantic_core-2.10.1-cp312-none-win_arm64.whl", hash = "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94"},
{file = "pydantic_core-2.10.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede"},
{file = "pydantic_core-2.10.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891"},
{file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221"},
{file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15"},
{file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f"},
{file = "pydantic_core-2.10.1-cp37-none-win32.whl", hash = "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c"},
{file = "pydantic_core-2.10.1-cp37-none-win_amd64.whl", hash = "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e"},
{file = "pydantic_core-2.10.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc"},
{file = "pydantic_core-2.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302"},
{file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e"},
{file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561"},
{file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de"},
{file = "pydantic_core-2.10.1-cp38-none-win32.whl", hash = "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee"},
{file = "pydantic_core-2.10.1-cp38-none-win_amd64.whl", hash = "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e"},
{file = "pydantic_core-2.10.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970"},
{file = "pydantic_core-2.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a"},
{file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429"},
{file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7"},
{file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595"},
{file = "pydantic_core-2.10.1-cp39-none-win32.whl", hash = "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a"},
{file = "pydantic_core-2.10.1-cp39-none-win_amd64.whl", hash = "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357"},
{file = "pydantic_core-2.10.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2"},
{file = "pydantic_core-2.10.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132"},
{file = "pydantic_core-2.10.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7"},
{file = "pydantic_core-2.10.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776"},
{file = "pydantic_core-2.10.1.tar.gz", hash = "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82"},
{file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"},
{file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"},
{file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"},
{file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"},
{file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"},
{file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"},
{file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"},
{file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"},
{file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"},
{file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"},
{file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"},
{file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"},
{file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"},
{file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"},
{file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"},
{file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"},
{file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"},
{file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"},
{file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"},
{file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"},
{file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"},
{file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"},
{file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"},
{file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"},
{file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"},
{file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"},
{file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"},
{file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"},
{file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"},
{file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"},
{file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"},
{file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"},
{file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"},
{file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"},
{file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"},
{file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"},
{file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"},
{file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"},
{file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"},
{file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"},
{file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"},
{file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"},
{file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"},
{file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"},
{file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"},
{file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"},
{file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"},
{file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"},
{file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"},
]
[package.dependencies]
@ -928,6 +928,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation"
version = "0.6.1"
version = "0.7.0"
description = "Hugging Face Text Generation Python Client"
license = "Apache-2.0"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
@ -12,7 +12,7 @@ repository = "https://github.com/huggingface/text-generation-inference"
[tool.poetry.dependencies]
python = "^3.7"
pydantic = "> 1.10, < 3"
pydantic = "> 2, < 3"
aiohttp = "^3.8"
huggingface-hub = ">= 0.12, < 1.0"

View File

@ -3,13 +3,19 @@ import requests
from aiohttp import ClientSession, ClientTimeout
from pydantic import ValidationError
from typing import Dict, Optional, List, AsyncIterator, Iterator
from typing import Dict, Optional, List, AsyncIterator, Iterator, Union
from text_generation.types import (
StreamResponse,
Response,
Request,
Parameters,
Grammar,
ChatRequest,
ChatCompletionChunk,
ChatComplete,
Message,
Tool,
)
from text_generation.errors import parse_error
@ -58,6 +64,120 @@ class Client:
self.cookies = cookies
self.timeout = timeout
def chat(
self,
messages: List[Message],
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[List[float]] = None,
logprobs: Optional[bool] = None,
top_logprobs: Optional[int] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[float] = None,
stream: bool = False,
seed: Optional[int] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
tools: Optional[List[Tool]] = None,
tool_choice: Optional[str] = None,
):
"""
Given a list of messages, generate a response asynchronously
Args:
messages (`List[Message]`):
List of messages
repetition_penalty (`float`):
The parameter for repetition penalty. 0.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 0.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
logit_bias (`List[float]`):
Adjust the likelihood of specified tokens
logprobs (`bool`):
Include log probabilities in the response
top_logprobs (`int`):
Include the `n` most likely tokens at each step
max_tokens (`int`):
Maximum number of generated tokens
n (`int`):
Generate `n` completions
presence_penalty (`float`):
The parameter for presence penalty. 0.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
stream (`bool`):
Stream the response
seed (`int`):
Random sampling seed
temperature (`float`):
The value used to module the logits distribution.
top_p (`float`):
If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
higher are kept for generation
tools (`List[Tool]`):
List of tools to use
tool_choice (`str`):
The tool to use
"""
request = ChatRequest(
model="tgi",
messages=messages,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
logit_bias=logit_bias,
logprobs=logprobs,
top_logprobs=top_logprobs,
max_tokens=max_tokens,
n=n,
presence_penalty=presence_penalty,
stream=stream,
seed=seed,
temperature=temperature,
top_p=top_p,
tools=tools,
tool_choice=tool_choice,
)
if not stream:
resp = requests.post(
f"{self.base_url}/v1/chat/completions",
json=request.dict(),
headers=self.headers,
cookies=self.cookies,
timeout=self.timeout,
)
payload = resp.json()
if resp.status_code != 200:
raise parse_error(resp.status_code, payload)
return ChatComplete(**payload)
else:
return self._chat_stream_response(request)
def _chat_stream_response(self, request):
resp = requests.post(
f"{self.base_url}/v1/chat/completions",
json=request.dict(),
headers=self.headers,
cookies=self.cookies,
timeout=self.timeout,
stream=True,
)
# iterate and print stream
for byte_payload in resp.iter_lines():
if byte_payload == b"\n":
continue
payload = byte_payload.decode("utf-8")
if payload.startswith("data:"):
json_payload = json.loads(payload.lstrip("data:").rstrip("\n"))
try:
response = ChatCompletionChunk(**json_payload)
yield response
except ValidationError:
raise parse_error(resp.status, json_payload)
def generate(
self,
prompt: str,
@ -65,6 +185,7 @@ class Client:
max_new_tokens: int = 20,
best_of: Optional[int] = None,
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
return_full_text: bool = False,
seed: Optional[int] = None,
stop_sequences: Optional[List[str]] = None,
@ -76,6 +197,7 @@ class Client:
watermark: bool = False,
decoder_input_details: bool = False,
top_n_tokens: Optional[int] = None,
grammar: Optional[Grammar] = None,
) -> Response:
"""
Given a prompt, generate the following text
@ -92,6 +214,10 @@ class Client:
repetition_penalty (`float`):
The parameter for repetition penalty. 1.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 1.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
return_full_text (`bool`):
Whether to prepend the prompt to the generated text
seed (`int`):
@ -116,6 +242,9 @@ class Client:
Return the decoder input token logprobs and ids
top_n_tokens (`int`):
Return the `n` most likely tokens at each step
grammar (`Grammar`):
Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
of the text to match a regular expression or JSON schema.
Returns:
Response: generated response
@ -127,6 +256,7 @@ class Client:
do_sample=do_sample,
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
return_full_text=return_full_text,
seed=seed,
stop=stop_sequences if stop_sequences is not None else [],
@ -138,6 +268,7 @@ class Client:
watermark=watermark,
decoder_input_details=decoder_input_details,
top_n_tokens=top_n_tokens,
grammar=grammar,
)
request = Request(inputs=prompt, stream=False, parameters=parameters)
@ -159,6 +290,7 @@ class Client:
do_sample: bool = False,
max_new_tokens: int = 20,
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
return_full_text: bool = False,
seed: Optional[int] = None,
stop_sequences: Optional[List[str]] = None,
@ -169,6 +301,7 @@ class Client:
typical_p: Optional[float] = None,
watermark: bool = False,
top_n_tokens: Optional[int] = None,
grammar: Optional[Grammar] = None,
) -> Iterator[StreamResponse]:
"""
Given a prompt, generate the following stream of tokens
@ -183,6 +316,10 @@ class Client:
repetition_penalty (`float`):
The parameter for repetition penalty. 1.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 1.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
return_full_text (`bool`):
Whether to prepend the prompt to the generated text
seed (`int`):
@ -205,6 +342,9 @@ class Client:
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
top_n_tokens (`int`):
Return the `n` most likely tokens at each step
grammar (`Grammar`):
Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
of the text to match a regular expression or JSON schema.
Returns:
Iterator[StreamResponse]: stream of generated tokens
@ -217,6 +357,7 @@ class Client:
do_sample=do_sample,
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
return_full_text=return_full_text,
seed=seed,
stop=stop_sequences if stop_sequences is not None else [],
@ -227,6 +368,7 @@ class Client:
typical_p=typical_p,
watermark=watermark,
top_n_tokens=top_n_tokens,
grammar=grammar,
)
request = Request(inputs=prompt, stream=True, parameters=parameters)
@ -306,7 +448,120 @@ class AsyncClient:
self.base_url = base_url
self.headers = headers
self.cookies = cookies
self.timeout = ClientTimeout(timeout * 60)
self.timeout = ClientTimeout(timeout)
async def chat(
self,
messages: List[Message],
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
logit_bias: Optional[List[float]] = None,
logprobs: Optional[bool] = None,
top_logprobs: Optional[int] = None,
max_tokens: Optional[int] = None,
n: Optional[int] = None,
presence_penalty: Optional[float] = None,
stream: bool = False,
seed: Optional[int] = None,
temperature: Optional[float] = None,
top_p: Optional[float] = None,
tools: Optional[List[Tool]] = None,
tool_choice: Optional[str] = None,
) -> Union[ChatComplete, AsyncIterator[ChatCompletionChunk]]:
"""
Given a list of messages, generate a response asynchronously
Args:
messages (`List[Message]`):
List of messages
repetition_penalty (`float`):
The parameter for frequency penalty. 0.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 0.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
logit_bias (`List[float]`):
Adjust the likelihood of specified tokens
logprobs (`bool`):
Include log probabilities in the response
top_logprobs (`int`):
Include the `n` most likely tokens at each step
max_tokens (`int`):
Maximum number of generated tokens
n (`int`):
Generate `n` completions
presence_penalty (`float`):
The parameter for presence penalty. 0.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
stream (`bool`):
Stream the response
seed (`int`):
Random sampling seed
temperature (`float`):
The value used to module the logits distribution.
top_p (`float`):
If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
higher are kept for generation
tools (`List[Tool]`):
List of tools to use
tool_choice (`str`):
The tool to use
"""
request = ChatRequest(
model="tgi",
messages=messages,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
logit_bias=logit_bias,
logprobs=logprobs,
top_logprobs=top_logprobs,
max_tokens=max_tokens,
n=n,
presence_penalty=presence_penalty,
stream=stream,
seed=seed,
temperature=temperature,
top_p=top_p,
tools=tools,
tool_choice=tool_choice,
)
if not stream:
return await self._chat_single_response(request)
else:
return self._chat_stream_response(request)
async def _chat_single_response(self, request):
async with ClientSession(
headers=self.headers, cookies=self.cookies, timeout=self.timeout
) as session:
async with session.post(
f"{self.base_url}/v1/chat/completions", json=request.dict()
) as resp:
payload = await resp.json()
if resp.status != 200:
raise parse_error(resp.status, payload)
return ChatComplete(**payload)
async def _chat_stream_response(self, request):
async with ClientSession(
headers=self.headers, cookies=self.cookies, timeout=self.timeout
) as session:
async with session.post(
f"{self.base_url}/v1/chat/completions", json=request.dict()
) as resp:
async for byte_payload in resp.content:
if byte_payload == b"\n":
continue
payload = byte_payload.decode("utf-8")
if payload.startswith("data:"):
json_payload = json.loads(payload.lstrip("data:").rstrip("\n"))
try:
response = ChatCompletionChunk(**json_payload)
yield response
except ValidationError:
raise parse_error(resp.status, json_payload)
async def generate(
self,
@ -315,6 +570,7 @@ class AsyncClient:
max_new_tokens: int = 20,
best_of: Optional[int] = None,
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
return_full_text: bool = False,
seed: Optional[int] = None,
stop_sequences: Optional[List[str]] = None,
@ -326,6 +582,7 @@ class AsyncClient:
watermark: bool = False,
decoder_input_details: bool = False,
top_n_tokens: Optional[int] = None,
grammar: Optional[Grammar] = None,
) -> Response:
"""
Given a prompt, generate the following text asynchronously
@ -342,6 +599,10 @@ class AsyncClient:
repetition_penalty (`float`):
The parameter for repetition penalty. 1.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 1.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
return_full_text (`bool`):
Whether to prepend the prompt to the generated text
seed (`int`):
@ -366,10 +627,14 @@ class AsyncClient:
Return the decoder input token logprobs and ids
top_n_tokens (`int`):
Return the `n` most likely tokens at each step
grammar (`Grammar`):
Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
of the text to match a regular expression or JSON schema.
Returns:
Response: generated response
"""
# Validate parameters
parameters = Parameters(
best_of=best_of,
@ -378,6 +643,7 @@ class AsyncClient:
do_sample=do_sample,
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
return_full_text=return_full_text,
seed=seed,
stop=stop_sequences if stop_sequences is not None else [],
@ -388,6 +654,7 @@ class AsyncClient:
typical_p=typical_p,
watermark=watermark,
top_n_tokens=top_n_tokens,
grammar=grammar,
)
request = Request(inputs=prompt, stream=False, parameters=parameters)
@ -407,6 +674,7 @@ class AsyncClient:
do_sample: bool = False,
max_new_tokens: int = 20,
repetition_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
return_full_text: bool = False,
seed: Optional[int] = None,
stop_sequences: Optional[List[str]] = None,
@ -417,6 +685,7 @@ class AsyncClient:
typical_p: Optional[float] = None,
watermark: bool = False,
top_n_tokens: Optional[int] = None,
grammar: Optional[Grammar] = None,
) -> AsyncIterator[StreamResponse]:
"""
Given a prompt, generate the following stream of tokens asynchronously
@ -431,6 +700,10 @@ class AsyncClient:
repetition_penalty (`float`):
The parameter for repetition penalty. 1.0 means no penalty. See [this
paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
frequency_penalty (`float`):
The parameter for frequency penalty. 1.0 means no penalty
Penalize new tokens based on their existing frequency in the text so far,
decreasing the model's likelihood to repeat the same line verbatim.
return_full_text (`bool`):
Whether to prepend the prompt to the generated text
seed (`int`):
@ -453,6 +726,9 @@ class AsyncClient:
Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
top_n_tokens (`int`):
Return the `n` most likely tokens at each step
grammar (`Grammar`):
Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
of the text to match a regular expression or JSON schema.
Returns:
AsyncIterator[StreamResponse]: stream of generated tokens
@ -465,6 +741,7 @@ class AsyncClient:
do_sample=do_sample,
max_new_tokens=max_new_tokens,
repetition_penalty=repetition_penalty,
frequency_penalty=frequency_penalty,
return_full_text=return_full_text,
seed=seed,
stop=stop_sequences if stop_sequences is not None else [],
@ -475,6 +752,7 @@ class AsyncClient:
typical_p=typical_p,
watermark=watermark,
top_n_tokens=top_n_tokens,
grammar=grammar,
)
request = Request(inputs=prompt, stream=True, parameters=parameters)

View File

@ -1,10 +1,147 @@
from enum import Enum
from pydantic import BaseModel, validator
from typing import Optional, List
from pydantic import BaseModel, field_validator
from typing import Optional, List, Union, Any
from text_generation.errors import ValidationError
# enum for grammar type
class GrammarType(str, Enum):
Json = "json"
Regex = "regex"
# Grammar type and value
class Grammar(BaseModel):
# Grammar type
type: GrammarType
# Grammar value
value: Union[str, dict]
class ToolCall(BaseModel):
# Id of the tool call
id: int
# Type of the tool call
type: str
# Function details of the tool call
function: dict
class Message(BaseModel):
# Role of the message sender
role: str
# Content of the message
content: Optional[str] = None
# Optional name of the message sender
name: Optional[str] = None
# Tool calls associated with the chat completion
tool_calls: Optional[Any] = None
class Tool(BaseModel):
# Type of the tool
type: str
# Function details of the tool
function: dict
class ChatCompletionComplete(BaseModel):
# Index of the chat completion
index: int
# Message associated with the chat completion
message: Message
# Log probabilities for the chat completion
logprobs: Optional[Any]
# Reason for completion
finish_reason: str
# Usage details of the chat completion
usage: Optional[Any] = None
class Function(BaseModel):
name: Optional[str]
arguments: str
class ChoiceDeltaToolCall(BaseModel):
index: int
id: str
type: str
function: Function
class ChoiceDelta(BaseModel):
role: str
content: Optional[str] = None
tool_calls: Optional[ChoiceDeltaToolCall]
class Choice(BaseModel):
index: int
delta: ChoiceDelta
logprobs: Optional[dict] = None
finish_reason: Optional[str] = None
class ChatCompletionChunk(BaseModel):
id: str
object: str
created: int
model: str
system_fingerprint: str
choices: List[Choice]
class ChatComplete(BaseModel):
# Chat completion details
id: str
object: str
created: int
model: str
system_fingerprint: str
choices: List[ChatCompletionComplete]
usage: Any
class ChatRequest(BaseModel):
# Model identifier
model: str
# List of messages in the conversation
messages: List[Message]
# The parameter for repetition penalty. 1.0 means no penalty.
# See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
repetition_penalty: Optional[float] = None
# The parameter for frequency penalty. 1.0 means no penalty
# Penalize new tokens based on their existing frequency in the text so far,
# decreasing the model's likelihood to repeat the same line verbatim.
frequency_penalty: Optional[float] = None
# Bias values for token selection
logit_bias: Optional[List[float]] = None
# Whether to return log probabilities
logprobs: Optional[bool] = None
# Number of most likely tokens to return at each position
top_logprobs: Optional[int] = None
# Maximum number of tokens to generate
max_tokens: Optional[int] = None
# Number of chat completion choices to generate
n: Optional[int] = None
# Penalty for presence of new tokens
presence_penalty: Optional[float] = None
# Flag to indicate streaming response
stream: bool = False
# Random sampling seed
seed: Optional[int] = None
# Sampling temperature
temperature: Optional[float] = None
# Top-p value for nucleus sampling
top_p: Optional[float] = None
# List of tools to be used
tools: Optional[List[Tool]] = None
# Choice of tool to be used
tool_choice: Optional[str] = None
class Parameters(BaseModel):
# Activate logits sampling
do_sample: bool = False
@ -13,6 +150,10 @@ class Parameters(BaseModel):
# The parameter for repetition penalty. 1.0 means no penalty.
# See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
repetition_penalty: Optional[float] = None
# The parameter for frequency penalty. 1.0 means no penalty
# Penalize new tokens based on their existing frequency in the text so far,
# decreasing the model's likelihood to repeat the same line verbatim.
frequency_penalty: Optional[float] = None
# Whether to prepend the prompt to the generated text
return_full_text: bool = False
# Stop generating tokens if a member of `stop_sequences` is generated
@ -41,74 +182,91 @@ class Parameters(BaseModel):
decoder_input_details: bool = False
# Return the N most likely tokens at each step
top_n_tokens: Optional[int] = None
# grammar to use for generation
grammar: Optional[Grammar] = None
@validator("best_of")
@field_validator("best_of")
def valid_best_of(cls, field_value, values):
if field_value is not None:
if field_value <= 0:
raise ValidationError("`best_of` must be strictly positive")
if field_value > 1 and values["seed"] is not None:
if field_value > 1 and values.data["seed"] is not None:
raise ValidationError("`seed` must not be set when `best_of` is > 1")
sampling = (
values["do_sample"]
| (values["temperature"] is not None)
| (values["top_k"] is not None)
| (values["top_p"] is not None)
| (values["typical_p"] is not None)
values.data["do_sample"]
| (values.data["temperature"] is not None)
| (values.data["top_k"] is not None)
| (values.data["top_p"] is not None)
| (values.data["typical_p"] is not None)
)
if field_value > 1 and not sampling:
raise ValidationError("you must use sampling when `best_of` is > 1")
return field_value
@validator("repetition_penalty")
@field_validator("repetition_penalty")
def valid_repetition_penalty(cls, v):
if v is not None and v <= 0:
raise ValidationError("`repetition_penalty` must be strictly positive")
return v
@validator("seed")
@field_validator("frequency_penalty")
def valid_frequency_penalty(cls, v):
if v is not None and v <= 0:
raise ValidationError("`frequency_penalty` must be strictly positive")
return v
@field_validator("seed")
def valid_seed(cls, v):
if v is not None and v < 0:
raise ValidationError("`seed` must be positive")
return v
@validator("temperature")
@field_validator("temperature")
def valid_temp(cls, v):
if v is not None and v <= 0:
raise ValidationError("`temperature` must be strictly positive")
return v
@validator("top_k")
@field_validator("top_k")
def valid_top_k(cls, v):
if v is not None and v <= 0:
raise ValidationError("`top_k` must be strictly positive")
return v
@validator("top_p")
@field_validator("top_p")
def valid_top_p(cls, v):
if v is not None and (v <= 0 or v >= 1.0):
raise ValidationError("`top_p` must be > 0.0 and < 1.0")
return v
@validator("truncate")
@field_validator("truncate")
def valid_truncate(cls, v):
if v is not None and v <= 0:
raise ValidationError("`truncate` must be strictly positive")
return v
@validator("typical_p")
@field_validator("typical_p")
def valid_typical_p(cls, v):
if v is not None and (v <= 0 or v >= 1.0):
raise ValidationError("`typical_p` must be > 0.0 and < 1.0")
return v
@validator("top_n_tokens")
@field_validator("top_n_tokens")
def valid_top_n_tokens(cls, v):
if v is not None and v <= 0:
raise ValidationError("`top_n_tokens` must be strictly positive")
return v
@field_validator("grammar")
def valid_grammar(cls, v):
if v is not None:
if v.type == GrammarType.Regex and not v.value:
raise ValidationError("`value` cannot be empty for `regex` grammar")
if v.type == GrammarType.Json and not v.value:
raise ValidationError("`value` cannot be empty for `json` grammar")
return v
class Request(BaseModel):
# Prompt
@ -118,15 +276,15 @@ class Request(BaseModel):
# Whether to stream output tokens
stream: bool = False
@validator("inputs")
@field_validator("inputs")
def valid_input(cls, v):
if not v:
raise ValidationError("`inputs` cannot be empty")
return v
@validator("stream")
@field_validator("stream")
def valid_best_of_stream(cls, field_value, values):
parameters = values["parameters"]
parameters = values.data["parameters"]
if (
parameters is not None
and parameters.best_of is not None
@ -157,7 +315,7 @@ class Token(BaseModel):
# Token text
text: str
# Logprob
logprob: float
logprob: Optional[float] = None
# Is the token a special token
# Can be used to ignore tokens when concatenating
special: bool

View File

@ -27,4 +27,4 @@
}
</script>
</body>
</html>
</html>

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,8 @@
title: Installation
- local: supported_models
title: Supported Models and Hardware
- local: messages_api
title: Messages API
title: Getting started
- sections:
- local: basic_tutorials/consuming_tgi
@ -21,6 +23,8 @@
title: All TGI CLI options
- local: basic_tutorials/non_core_models
title: Non-core Model Serving
- local: basic_tutorials/safety
title: Safety
title: Tutorials
- sections:
- local: conceptual/streaming
@ -35,4 +39,8 @@
title: Safetensors
- local: conceptual/flash_attention
title: Flash Attention
- local: conceptual/speculation
title: Speculation (Medusa, ngram)
- local: conceptual/guidance
title: Guidance, JSON, tools (using outlines)
title: Conceptual Guides

View File

@ -23,7 +23,7 @@ You can simply install `huggingface-hub` package with pip.
pip install huggingface-hub
```
Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python.
Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python.
```python
from huggingface_hub import InferenceClient
@ -83,8 +83,8 @@ Gradio is a Python library that helps you build web applications for your machin
pip install huggingface-hub gradio
```
Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client).
Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client).
```python
import gradio as gr
from huggingface_hub import InferenceClient
@ -110,30 +110,30 @@ gr.ChatInterface(
).queue().launch()
```
The UI looks like this 👇
The UI looks like this 👇
<div class="flex justify-center">
<img
class="block dark:hidden"
<img
class="block dark:hidden"
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/gradio-tgi.png"
/>
<img
class="hidden dark:block"
<img
class="hidden dark:block"
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/gradio-tgi-dark.png"
/>
</div>
You can try the demo directly here 👇
You can try the demo directly here 👇
<div class="block dark:hidden">
<iframe
<iframe
src="https://merve-gradio-tgi-2.hf.space?__theme=light"
width="850"
height="750"
></iframe>
</div>
<div class="hidden dark:block">
<iframe
<iframe
src="https://merve-gradio-tgi-2.hf.space?__theme=dark"
width="850"
height="750"
@ -152,4 +152,4 @@ You can read more about how to customize a `ChatInterface` [here](https://www.gr
## API documentation
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route. The Swagger UI is also available [here](https://huggingface.github.io/text-generation-inference).
You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route. The Swagger UI is also available [here](https://huggingface.github.io/text-generation-inference).

View File

@ -19,6 +19,6 @@ docker run --gpus all \
--shm-size 1g \
-e HUGGING_FACE_HUB_TOKEN=$token \
-p 8080:80 \
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 \
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 \
--model-id $model
```

View File

@ -60,12 +60,21 @@ Options:
[env: QUANTIZE=]
Possible values:
- awq: 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=awq. Should replace GPTQ models whereever possible because of the better latency
- eetq: 8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from https://github.com/NetEase-FuXi/EETQ.git
- gptq: 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq. text-generation-inference will use exllama (faster) kernels whereever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
- awq: 4 bit quantization. Requires a specific AWQ quantized model: <https://hf.co/models?search=awq>. Should replace GPTQ models wherever possible because of the better latency
- eetq: 8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from <https://github.com/NetEase-FuXi/EETQ.git>
- gptq: 4 bit quantization. Requires a specific GTPQ quantized model: <https://hf.co/models?search=gptq>. text-generation-inference will use exllama (faster) kernels wherever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
- bitsandbytes: Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half, but it is known that the model will be much slower to run than the native f16
- bitsandbytes-nf4: Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x, but it is known that the model will be much slower to run than the native f16
- bitsandbytes-fp4: Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better perplexity performance for you model
- fp8: [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above This dtype has native ops should be the fastest if available. This is currently not the fastest because of local unpacking + padding to satisfy matrix multiplication limitations
```
## SPECULATE
```shell
--speculate <SPECULATE>
The number of input_ids to speculate on If using a medusa model, the heads will be picked up automatically Other wise, it will use n-gram speculation which is relatively free in terms of compute, but the speedup heavily depends on the task
[env: SPECULATE=]
```
## DTYPE
@ -120,23 +129,29 @@ Options:
[env: MAX_TOP_N_TOKENS=]
[default: 5]
```
## MAX_INPUT_TOKENS
```shell
--max-input-tokens <MAX_INPUT_TOKENS>
This is the maximum allowed input length (expressed in number of tokens) for users. The larger this value, the longer prompt users can send which can impact the overall memory required to handle the load. Please note that some models have a finite range of sequence they can handle. Default to min(max_position_embeddings - 1, 4095)
[env: MAX_INPUT_TOKENS=]
```
## MAX_INPUT_LENGTH
```shell
--max-input-length <MAX_INPUT_LENGTH>
This is the maximum allowed input length (expressed in number of tokens) for users. The larger this value, the longer prompt users can send which can impact the overall memory required to handle the load. Please note that some models have a finite range of sequence they can handle
Legacy version of [`Args::max_input_tokens`]
[env: MAX_INPUT_LENGTH=]
[default: 1024]
```
## MAX_TOTAL_TOKENS
```shell
--max-total-tokens <MAX_TOTAL_TOKENS>
This is the most important value to set as it defines the "memory budget" of running clients requests. Clients will send input sequences and ask to generate `max_new_tokens` on top. with a value of `1512` users can send either a prompt of `1000` and ask for `512` new tokens, or send a prompt of `1` and ask for `1511` max_new_tokens. The larger this value, the larger amount each request will be in your RAM and the less effective batching can be
This is the most important value to set as it defines the "memory budget" of running clients requests. Clients will send input sequences and ask to generate `max_new_tokens` on top. with a value of `1512` users can send either a prompt of `1000` and ask for `512` new tokens, or send a prompt of `1` and ask for `1511` max_new_tokens. The larger this value, the larger amount each request will be in your RAM and the less effective batching can be. Default to min(max_position_embeddings, 4096)
[env: MAX_TOTAL_TOKENS=]
[default: 2048]
```
## WAITING_SERVED_RATIO
@ -153,10 +168,9 @@ Options:
## MAX_BATCH_PREFILL_TOKENS
```shell
--max-batch-prefill-tokens <MAX_BATCH_PREFILL_TOKENS>
Limits the number of tokens for the prefill operation. Since this operation take the most memory and is compute bound, it is interesting to limit the number of requests that can be sent
Limits the number of tokens for the prefill operation. Since this operation take the most memory and is compute bound, it is interesting to limit the number of requests that can be sent. Default to `max_input_tokens + 50` to give a bit of room
[env: MAX_BATCH_PREFILL_TOKENS=]
[default: 4096]
```
## MAX_BATCH_TOTAL_TOKENS
@ -189,6 +203,22 @@ Options:
[env: MAX_WAITING_TOKENS=]
[default: 20]
```
## MAX_BATCH_SIZE
```shell
--max-batch-size <MAX_BATCH_SIZE>
Enforce a maximum number of requests per batch Specific flag for hardware targets that do not support unpadded inference
[env: MAX_BATCH_SIZE=]
```
## CUDA_GRAPHS
```shell
--cuda-graphs <CUDA_GRAPHS>
Specify the batch sizes to compute cuda graphs for. Use "0" to disable. Default = "1,2,4,8,16,32"
[env: CUDA_GRAPHS=]
```
## HOSTNAME
```shell
@ -346,6 +376,22 @@ Options:
[env: NGROK_EDGE=]
```
## TOKENIZER_CONFIG_PATH
```shell
--tokenizer-config-path <TOKENIZER_CONFIG_PATH>
The path to the tokenizer config file. This path is used to load the tokenizer configuration which may include a `chat_template`. If not provided, the default config will be used from the model hub
[env: TOKENIZER_CONFIG_PATH=]
```
## DISABLE_GRAMMAR_SUPPORT
```shell
--disable-grammar-support
Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar
[env: DISABLE_GRAMMAR_SUPPORT=]
```
## ENV
```shell

View File

@ -2,19 +2,19 @@
TGI supports various LLM architectures (see full list [here](../supported_models)). If you wish to serve a model that is not one of the supported models, TGI will fallback to the `transformers` implementation of that model. This means you will be unable to use some of the features introduced by TGI, such as tensor-parallel sharding or flash attention. However, you can still get many benefits of TGI, such as continuous batching or streaming outputs.
You can serve these models using the same Docker command-line invocation as with fully supported models 👇
You can serve these models using the same Docker command-line invocation as with fully supported models 👇
```bash
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id gpt2
```
If the model you wish to serve is a custom transformers model, and its weights and implementation are available in the Hub, you can still serve the model by passing the `--trust-remote-code` flag to the `docker run` command like below 👇
If the model you wish to serve is a custom transformers model, and its weights and implementation are available in the Hub, you can still serve the model by passing the `--trust-remote-code` flag to the `docker run` command like below 👇
```bash
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id <CUSTOM_MODEL_ID> --trust-remote-code
```
Finally, if the model is not on Hugging Face Hub but on your local, you can pass the path to the folder that contains your model like below 👇
Finally, if the model is not on Hugging Face Hub but on your local, you can pass the path to the folder that contains your model like below 👇
```bash
# Make sure your model is in the $volume directory

View File

@ -1,6 +1,6 @@
# Preparing the Model
Text Generation Inference improves the model in several aspects.
Text Generation Inference improves the model in several aspects.
## Quantization
@ -9,7 +9,7 @@ TGI supports [bits-and-bytes](https://github.com/TimDettmers/bitsandbytes#bitsan
## RoPE Scaling
RoPE scaling can be used to increase the sequence length of the model during the inference time without necessarily fine-tuning it. To enable RoPE scaling, simply pass `--rope-scaling`, `--max-input-length` and `--rope-factors` flags when running through CLI. `--rope-scaling` can take the values `linear` or `dynamic`. If your model is not fine-tuned to a longer sequence length, use `dynamic`. `--rope-factor` is the ratio between the intended max sequence length and the model's original max sequence length. Make sure to pass `--max-input-length` to provide maximum input length for extension.
RoPE scaling can be used to increase the sequence length of the model during the inference time without necessarily fine-tuning it. To enable RoPE scaling, simply pass `--rope-scaling`, `--max-input-length` and `--rope-factors` flags when running through CLI. `--rope-scaling` can take the values `linear` or `dynamic`. If your model is not fine-tuned to a longer sequence length, use `dynamic`. `--rope-factor` is the ratio between the intended max sequence length and the model's original max sequence length. Make sure to pass `--max-input-length` to provide maximum input length for extension.
<Tip>
@ -19,4 +19,4 @@ We recommend using `dynamic` RoPE scaling.
## Safetensors
[Safetensors](https://github.com/huggingface/safetensors) is a fast and safe persistence format for deep learning models, and is required for tensor parallelism. TGI supports `safetensors` model loading under the hood. By default, given a repository with `safetensors` and `pytorch` weights, TGI will always load `safetensors`. If there's no `pytorch` weights, TGI will convert the weights to `safetensors` format.
[Safetensors](https://github.com/huggingface/safetensors) is a fast and safe persistence format for deep learning models, and is required for tensor parallelism. TGI supports `safetensors` model loading under the hood. By default, given a repository with `safetensors` and `pytorch` weights, TGI will always load `safetensors`. If there's no `pytorch` weights, TGI will convert the weights to `safetensors` format.

View File

@ -0,0 +1,31 @@
# Model safety.
[Pytorch uses pickle](https://pytorch.org/docs/master/generated/torch.load.html) by default meaning that for quite a long while
*Every* model using that format is potentially executing unintended code while purely loading the model.
There is a big red warning on Python's page for pickle [link](https://docs.python.org/3/library/pickle.html) but for quite a while
this was ignored by the community. Now that AI/ML is getting used much more ubiquitously we need to switch away from this format.
HuggingFace is leading the effort here by creating a new format which contains pure data ([safetensors](https://github.com/huggingface/safetensors))
and moving slowly but surely all the libs to make use of it by default.
The move is intentionnally slow in order to make breaking changes as little impact as possible on users throughout.
# TGI 2.0
Since the release of TGI 2.0, we take the opportunity of this major version increase to break backward compatibility for these pytorch
models (since they are a huge security risk for anyone deploying them).
From now on, TGI will not convert automatically pickle files without having `--trust-remote-code` flag or `TRUST_REMOTE_CODE=true` in the environment variables.
This flag is already used for community defined inference code, and is therefore quite representative of the level of confidence you are giving the model providers.
If you want to use a model that uses pickle, but you still do not want to trust the authors entirely we recommend making a convertion on our space made for that.
https://huggingface.co/spaces/safetensors/convert
This space will create a PR on the original model, which you are use directly regardless of merge status from the original authors. Just use
```
docker run .... --revision refs/pr/#ID # Or use REVISION=refs/pr/#ID in the environment
```

View File

@ -1,30 +1,30 @@
# Using TGI CLI
You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](./installation#install-cli).
You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](../installation#install-cli).
`text-generation-server` lets you download the model with `download-weights` command like below 👇
`text-generation-server` lets you download the model with `download-weights` command like below 👇
```bash
text-generation-server download-weights MODEL_HUB_ID
```
You can also use it to quantize models like below 👇
You can also use it to quantize models like below 👇
```bash
text-generation-server quantize MODEL_HUB_ID OUTPUT_DIR
text-generation-server quantize MODEL_HUB_ID OUTPUT_DIR
```
You can use `text-generation-launcher` to serve models.
You can use `text-generation-launcher` to serve models.
```bash
text-generation-launcher --model-id MODEL_HUB_ID --port 8080
```
There are many options and parameters you can pass to `text-generation-launcher`. The documentation for CLI is kept minimal and intended to rely on self-generating documentation, which can be found by running
There are many options and parameters you can pass to `text-generation-launcher`. The documentation for CLI is kept minimal and intended to rely on self-generating documentation, which can be found by running
```bash
text-generation-launcher --help
```
```
You can also find it hosted in this [Swagger UI](https://huggingface.github.io/text-generation-inference/).

View File

@ -1,12 +1,11 @@
# Flash Attention
Scaling the transformer architecture is heavily bottlenecked by the self-attention mechanism, which has quadratic time and memory complexity. Recent developments in accelerator hardware mainly focus on enhancing compute capacities and not memory and transferring data between hardware. This results in attention operation having a memory bottleneck. **Flash Attention** is an attention algorithm used to reduce this problem and scale transformer-based models more efficiently, enabling faster training and inference.
Scaling the transformer architecture is heavily bottlenecked by the self-attention mechanism, which has quadratic time and memory complexity. Recent developments in accelerator hardware mainly focus on enhancing compute capacities and not memory and transferring data between hardware. This results in attention operation having a memory bottleneck. **Flash Attention** is an attention algorithm used to reduce this problem and scale transformer-based models more efficiently, enabling faster training and inference.
Standard attention mechanism uses High Bandwidth Memory (HBM) to store, read and write keys, queries and values. HBM is large in memory, but slow in processing, meanwhile SRAM is smaller in memory, but faster in operations. In the standard attention implementation, the cost of loading and writing keys, queries, and values from HBM is high. It loads keys, queries, and values from HBM to GPU on-chip SRAM, performs a single step of the attention mechanism, writes it back to HBM, and repeats this for every single attention step. Instead, Flash Attention loads keys, queries, and values once, fuses the operations of the attention mechanism, and writes them back.
Standard attention mechanism uses High Bandwidth Memory (HBM) to store, read and write keys, queries and values. HBM is large in memory, but slow in processing, meanwhile SRAM is smaller in memory, but faster in operations. In the standard attention implementation, the cost of loading and writing keys, queries, and values from HBM is high. It loads keys, queries, and values from HBM to GPU on-chip SRAM, performs a single step of the attention mechanism, writes it back to HBM, and repeats this for every single attention step. Instead, Flash Attention loads keys, queries, and values once, fuses the operations of the attention mechanism, and writes them back.
![Flash Attention](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/flash-attn.png)
It is implemented for supported models. You can check out the complete list of models that support Flash Attention [here](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models), for models with flash prefix.
You can learn more about Flash Attention by reading the paper in this [link](https://arxiv.org/abs/2205.14135).

View File

@ -0,0 +1,419 @@
# Guidance
Text Generation Inference (TGI) now supports [JSON and regex grammars](#grammar-and-constraints) and [tools and functions](#tools-and-functions) to help developer guide LLM responses to fit their needs.
These feature are available starting from version `1.4.3`. They are accessible via the [text_generation](https://pypi.org/project/text-generation/) library and is compatible with OpenAI's client libraries. The following guide will walk you through the new features and how to use them!
## Quick Start
Before we jump into the deep end, ensure your system is using TGI version `1.4.3` or later to access all the features we're about to explore in this guide.
If you're not up to date, grab the latest version and let's get started!
## Table of Contents 📚
### Grammar and Constraints
- [The Grammar Parameter](#the-grammar-parameter): Shape your AI's responses with precision.
- [Constrain with Pydantic](#constrain-with-pydantic): Define a grammar using Pydantic models.
- [JSON Schema Integration](#json-schema-integration): Fine grain control over your requests via JSON schema.
- [Using the client](#using-the-client): Use TGI's client libraries to shape the AI's responses.
### Tools and Functions
- [The Tools Parameter](#the-tools-parameter): Enhance the AI's capabilities with predefined functions.
- [Via the client](#text-generation-inference-client): Use TGI's client libraries to interact with the Messages API and Tool functions.
- [OpenAI integration](#openai-integration): Use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.
## Grammar and Constraints 🛣️
### The Grammar Parameter
In TGI `1.4.3`, we've introduced the grammar parameter, which allows you to specify the format of the response you want from the AI. This is a game-changer for those who need precise control over the AI's output.
Using curl, you can make a request to TGI's Messages API with the grammar parameter. This is the most primitive way to interact with the API and using [Pydantic](#constrain-with-pydantic) is recommended for ease of use and readability.
```json
curl localhost:3000/generate \
-X POST \
-H 'Content-Type: application/json' \
-d '{
"inputs": "I saw a puppy a cat and a raccoon during my bike ride in the park",
"parameters": {
"repetition_penalty": 1.3,
"grammar": {
"type": "json",
"value": {
"properties": {
"location": {
"type": "string"
},
"activity": {
"type": "string"
},
"animals_seen": {
"type": "integer",
"minimum": 1,
"maximum": 5
},
"animals": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": ["location", "activity", "animals_seen", "animals"]
}
}
}
}'
// {"generated_text":"{ \n\n\"activity\": \"biking\",\n\"animals\": [\"puppy\",\"cat\",\"raccoon\"],\n\"animals_seen\": 3,\n\"location\": \"park\"\n}"}
```
A grammar can be defined using Pydantic models, JSON schema, or regular expressions. The AI will then generate a response that conforms to the specified grammar.
> Note: A grammar must compile to a intermediate representation to constrain the output. Grammar compilation is a computationally expensive and may take a few seconds to complete on the first request. Subsequent requests will use the cached grammar and will be much faster.
### Constrain with Pydantic
Pydantic is a powerful library for data validation and settings management. It's the perfect tool for crafting the a specific response format.
Using Pydantic models we can define a similar grammar as the previous example in a shorter and more readable way.
```python
import requests
from pydantic import BaseModel, conint
from typing import List
class Animals(BaseModel):
location: str
activity: str
animals_seen: conint(ge=1, le=5) # Constrained integer type
animals: List[str]
prompt = "convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park"
data = {
"inputs": prompt,
"parameters": {
"repetition_penalty": 1.3,
"grammar": {
"type": "json",
"value": Animals.schema()
}
}
}
headers = {
"Content-Type": "application/json",
}
response = requests.post(
'http://127.0.0.1:3000/generate',
headers=headers,
json=data
)
print(response.json())
# {'generated_text': '{ "activity": "bike riding", "animals": ["puppy","cat","raccoon"],"animals_seen": 3, "location":"park" }'}
```
### JSON Schema Integration
If Pydantic's not your style, go raw with direct JSON Schema integration. It's like having a conversation with the AI in its own language. This is simliar to the first example but with programmatic control.
```python
import requests
json_schema = {
"properties": {
"location": {
"type": "string"
},
"activity": {
"type": "string"
},
"animals_seen": {
"type": "integer",
"minimum": 1,
"maximum": 5
},
"animals": {
"type": "array",
"items": {
"type": "string"
}
}
},
"required": ["location", "activity", "animals_seen", "animals"]
}
data = {
"inputs": "[INST]convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park [/INST]",
"parameters": {
"max_new_tokens": 200,
"repetition_penalty": 1.3,
"grammar": {
"type": "json",
"value": json_schema
}
}
}
headers = {
"Content-Type": "application/json",
}
response = requests.post(
'http://127.0.0.1:3000/generate',
headers=headers,
json=data
)
print(response.json())
# {'generated_text': '{\n"activity": "biking",\n"animals": ["puppy","cat","raccoon"]\n , "animals_seen": 3,\n "location":"park"}'}
```
### Using the client
TGI provides a client library to that make it easy to send requests with all of the parameters we've discussed above. Here's an example of how to use the client to send a request with a grammar parameter.
```python
from text_generation import AsyncClient
from text_generation.types import GrammarType
# NOTE: tools defined above and removed for brevity
# Define an async function to encapsulate the async operation
async def main():
client = AsyncClient(base_url="http://localhost:3000")
# Use 'await' to wait for the async method 'chat' to complete
response = await client.generate(
"Whats Googles DNS",
max_new_tokens=10,
decoder_input_details=True,
seed=1,
grammar={
"type": GrammarType.Regex,
"value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)",
},
)
# Once the response is received, you can process it
print(response.generated_text)
# Ensure the main async function is run in the event loop
if __name__ == "__main__":
import asyncio
asyncio.run(main())
# 118.8.0.84
```
## Tools and Functions 🛠️
### The Tools Parameter
In addition to the grammar parameter, we've also introduced a set of tools and functions to help you get the most out of the Messages API.
Tools are a set of user defined functions that can be used in tandem with the chat functionality to enhance the AI's capabilities. You can use these tools to perform a variety of tasks, such as data manipulation, formatting, and more.
Functions, similar to grammar are defined as JSON schema and can be passed as part of the parameters to the Messages API.
```json
curl localhost:3000/v1/chat/completions \
-X POST \
-H 'Content-Type: application/json' \
-d '{
"model": "tgi",
"messages": [
{
"role": "user",
"content": "What is the weather like in New York?"
}
],
"tools": [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA"
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location."
}
},
"required": ["location", "format"]
}
}
}
],
"tool_choice": "get_current_weather"
}'
// {"id":"","object":"text_completion","created":1709051640,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":19,"total_tokens":176}}
```
<details>
<summary>Tools used in example below</summary>
```python
tools = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
},
"required": ["location", "format"],
},
},
},
{
"type": "function",
"function": {
"name": "get_n_day_weather_forecast",
"description": "Get an N-day weather forecast",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. San Francisco, CA",
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use. Infer this from the users location.",
},
"num_days": {
"type": "integer",
"description": "The number of days to forecast",
},
},
"required": ["location", "format", "num_days"],
},
},
}
]
```
</details>
### Text Generation Inference Client
TGI provides a client library to interact with the Messages API and Tool functions. The client library is available in both synchronous and asynchronous versions.
```python
from text_generation import AsyncClient
# NOTE: tools defined above and removed for brevity
# Define an async function to encapsulate the async operation
async def main():
client = AsyncClient(base_url="http://localhost:3000")
# Use 'await' to wait for the async method 'chat' to complete
response = await client.chat(
max_tokens=100,
seed=1,
tools=tools,
presence_penalty=-1.1,
messages=[
{
"role": "system",
"content": "You're a helpful assistant! Answer the users question best you can.",
},
{
"role": "user",
"content": "What is the weather like in Brooklyn, New York?",
},
],
)
# Once the response is received, you can process it
print(response.choices[0].message.tool_calls)
# Ensure the main async function is run in the event loop
if __name__ == "__main__":
import asyncio
asyncio.run(main())
# {"id":"","object":"text_completion","created":1709051942,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":20,"total_tokens":177}}
```
### OpenAI integration
TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.
However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary.
```python
from openai import OpenAI
# Initialize the client, pointing it to one of the available models
client = OpenAI(
base_url="http://localhost:3000/v1",
api_key="_",
)
# NOTE: tools defined above and removed for brevity
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{
"role": "system",
"content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
},
{
"role": "user",
"content": "What's the weather like the next 3 days in San Francisco, CA?",
},
],
tools=tools,
tool_choice="auto", # tool selected by model
max_tokens=500,
)
called = chat_completion.choices[0].message.tool_calls
print(called)
# {
# "id": 0,
# "type": "function",
# "function": {
# "description": None,
# "name": "tools",
# "parameters": {
# "format": "celsius",
# "location": "San Francisco, CA",
# "num_days": 3,
# },
# },
# }
```

View File

@ -4,20 +4,20 @@ TGI offers GPTQ and bits-and-bytes quantization to quantize large language model
## Quantization with GPTQ
GPTQ is a post-training quantization method to make the model smaller. It quantizes the layers by finding a compressed version of that weight, that will yield a minimum mean squared error like below 👇
GPTQ is a post-training quantization method to make the model smaller. It quantizes the layers by finding a compressed version of that weight, that will yield a minimum mean squared error like below 👇
Given a layer \\(l\\) with weight matrix \\(W_{l}\\) and layer input \\(X_{l}\\), find quantized weight \\(\\hat{W}_{l}\\):
$$({\hat{W}_{l}}^{*} = argmin_{\hat{W_{l}}} ||W_{l}X-\hat{W}_{l}X||^{2}_{2})$$
TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
```bash
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize gptq
```
Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
To quantize a given model using GPTQ with a calibration dataset, simply run
@ -41,7 +41,7 @@ You can learn more about GPTQ from the [paper](https://arxiv.org/pdf/2210.17323.
bitsandbytes is a library used to apply 8-bit and 4-bit quantization to models. Unlike GPTQ quantization, bitsandbytes doesn't require a calibration dataset or any post-processing weights are automatically quantized on load. However, inference with bitsandbytes is slower than GPTQ or FP16 precision.
8-bit quantization enables multi-billion parameter scale models to fit in smaller hardware without degrading performance too much.
8-bit quantization enables multi-billion parameter scale models to fit in smaller hardware without degrading performance too much.
In TGI, you can use 8-bit quantization by adding `--quantize bitsandbytes` like below 👇
```bash
@ -50,7 +50,7 @@ docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingf
4-bit quantization is also possible with bitsandbytes. You can choose one of the following 4-bit data types: 4-bit float (`fp4`), or 4-bit `NormalFloat` (`nf4`). These data types were introduced in the context of parameter-efficient fine-tuning, but you can apply them for inference by automatically converting the model weights on load.
In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
```bash
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize bitsandbytes-nf4

View File

@ -1,7 +1,7 @@
# Safetensors
Safetensors is a model serialization format for deep learning models. It is [faster](https://huggingface.co/docs/safetensors/speed) and safer compared to other serialization formats like pickle (which is used under the hood in many deep learning libraries).
Safetensors is a model serialization format for deep learning models. It is [faster](https://huggingface.co/docs/safetensors/speed) and safer compared to other serialization formats like pickle (which is used under the hood in many deep learning libraries).
TGI depends on safetensors format mainly to enable [tensor parallelism sharding](./tensor_parallelism). For a given model repository during serving, TGI looks for safetensors weights. If there are no safetensors weights, TGI converts the PyTorch weights to safetensors format.
TGI depends on safetensors format mainly to enable [tensor parallelism sharding](./tensor_parallelism). For a given model repository during serving, TGI looks for safetensors weights. If there are no safetensors weights, TGI converts the PyTorch weights to safetensors format.
You can learn more about safetensors by reading the [safetensors documentation](https://huggingface.co/docs/safetensors/index).
You can learn more about safetensors by reading the [safetensors documentation](https://huggingface.co/docs/safetensors/index).

View File

@ -0,0 +1,48 @@
## Speculation
Speculative decoding, assisted generation, Medusa, and others are a few different names for the same idea.
The idea is to generate tokens *before* the large model actually runs, and only *check* if those tokens where valid.
So you are making *more* computations on your LLM, but if you are correct you produce 1, 2, 3 etc.. tokens on a single LLM pass. Since LLMs are usually memory bound (and not compute bound), provided your guesses are correct enough, this is a 2-3x faster inference (It can be much more for code oriented tasks for instance).
You can check a more [detailed explanation](https://huggingface.co/blog/assisted-generation).
Text-generation inference supports 2 main speculative methods:
- Medusa
- N-gram
### Medusa
Medusa is a [simple method](https://arxiv.org/abs/2401.10774) to create many tokens in a single pass using fine-tuned LM heads in addition to your existing models.
You can check a few existing fine-tunes for popular models:
- [text-generation-inference/gemma-7b-it-medusa](https://huggingface.co/text-generation-inference/gemma-7b-it-medusa)
- [text-generation-inference/Mixtral-8x7B-Instruct-v0.1-medusa](https://huggingface.co/text-generation-inference/Mixtral-8x7B-Instruct-v0.1-medusa)
- [text-generation-inference/Mistral-7B-Instruct-v0.2-medusa](https://huggingface.co/text-generation-inference/Mistral-7B-Instruct-v0.2-medusa)
In order to create your own medusa heads for your own finetune, you should check own the original medusa repo. [https://github.com/FasterDecoding/Medusa](https://github.com/FasterDecoding/Medusa)
In order to use medusa models in TGI, simply point to a medusa enabled model, and everything will load automatically.
### N-gram
If you don't have a medusa model, or don't have the resource to fine-tune, you can try to use `n-gram`.
Ngram works by trying to find in the previous sequence existing tokens that match, and use those as speculation.
This is an extremely simple method, which works best for code, or highly repetitive text. This might not be beneficial, if the speculation misses too much.
In order to enable n-gram speculation simply use
`--speculate 2` in your flags.
[Details about the flag](https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher#speculate)

View File

@ -5,12 +5,12 @@
Token streaming is the mode in which the server returns the tokens one by one as the model generates them. This enables showing progressive generations to the user rather than waiting for the whole generation. Streaming is an essential aspect of the end-user experience as it reduces latency, one of the most critical aspects of a smooth experience.
<div class="flex justify-center">
<img
class="block dark:hidden"
<img
class="block dark:hidden"
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/streaming-generation-visual_360.gif"
/>
<img
class="hidden dark:block"
<img
class="hidden dark:block"
src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/streaming-generation-visual-dark_360.gif"
/>
</div>
@ -25,14 +25,14 @@ With token streaming, the server can start returning the tokens one by one befor
For example, a system can generate 100 tokens per second. If the system generates 1000 tokens, with the non-streaming setup, users need to wait 10 seconds to get results. On the other hand, with the streaming setup, users get initial results immediately, and although end-to-end latency will be the same, they can see half of the generation after five seconds. Below you can see an interactive demo that shows non-streaming vs streaming side-by-side. Click **generate** below.
<div class="block dark:hidden">
<iframe
<iframe
src="https://osanseviero-streaming-vs-non-streaming.hf.space?__theme=light"
width="850"
height="350"
></iframe>
</div>
<div class="hidden dark:block">
<iframe
<iframe
src="https://osanseviero-streaming-vs-non-streaming.hf.space?__theme=dark"
width="850"
height="350"
@ -43,7 +43,7 @@ For example, a system can generate 100 tokens per second. If the system generate
### Streaming with Python
To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate over the response.
To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate over the response.
```python
from huggingface_hub import InferenceClient
@ -116,7 +116,7 @@ curl -N 127.0.0.1:8080/generate_stream \
First, we need to install the `@huggingface/inference` library.
`npm install @huggingface/inference`
If you're using the free Inference API, you can use `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint`. Let's
If you're using the free Inference API, you can use `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint`. Let's
We can create a `HfInferenceEndpoint` providing our endpoint URL and credential.
@ -129,7 +129,7 @@ const hf = new HfInferenceEndpoint('https://YOUR_ENDPOINT.endpoints.huggingface.
const prompt = 'What can you do in Nuremberg, Germany? Give me 3 Tips'
const stream = hf.textGenerationStream({ inputs: prompt })
for await (const r of stream) {
for await (const r of stream) {
// yield the generated token
process.stdout.write(r.token.text)
}

View File

@ -1,6 +1,6 @@
# Tensor Parallelism
Tensor parallelism is a technique used to fit a large model in multiple GPUs. For example, when multiplying the input tensors with the first weight tensor, the matrix multiplication is equivalent to splitting the weight tensor column-wise, multiplying each column with the input separately, and then concatenating the separate outputs. These outputs are then transferred from the GPUs and concatenated together to get the final result, like below 👇
Tensor parallelism is a technique used to fit a large model in multiple GPUs. For example, when multiplying the input tensors with the first weight tensor, the matrix multiplication is equivalent to splitting the weight tensor column-wise, multiplying each column with the input separately, and then concatenating the separate outputs. These outputs are then transferred from the GPUs and concatenated together to get the final result, like below 👇
![Image courtesy of Anton Lozkhov](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/TP.png)

View File

@ -4,7 +4,7 @@ This section explains how to install the CLI tool as well as installing TGI from
## Install CLI
You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters.
You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters.
To install the CLI, you need to first clone the TGI repository and then run `make`.
@ -23,7 +23,7 @@ BUILD_EXTENSIONS=True make install
Before you start, you will need to setup your environment, and install Text Generation Inference. Text Generation Inference is tested on **Python 3.9+**.
Text Generation Inference is available on pypi, conda and GitHub.
Text Generation Inference is available on pypi, conda and GitHub.
To install and launch locally, first [install Rust](https://rustup.rs/) and create a Python virtual environment with at least
Python 3.9, e.g. using conda:

175
docs/source/messages_api.md Normal file
View File

@ -0,0 +1,175 @@
# Messages API
Text Generation Inference (TGI) now supports the Messages API, which is fully compatible with the OpenAI Chat Completion API. This feature is available starting from version 1.4.0. You can use OpenAI's client libraries or third-party libraries expecting OpenAI schema to interact with TGI's Messages API. Below are some examples of how to utilize this compatibility.
> **Note:** The Messages API is supported from TGI version 1.4.0 and above. Ensure you are using a compatible version to access this feature.
#### Table of Contents
- [Making a Request](#making-a-request)
- [Streaming](#streaming)
- [Synchronous](#synchronous)
- [Hugging Face Inference Endpoints](#hugging-face-inference-endpoints)
- [Cloud Providers](#cloud-providers)
- [Amazon SageMaker](#amazon-sagemaker)
## Making a Request
You can make a request to TGI's Messages API using `curl`. Here's an example:
```bash
curl localhost:3000/v1/chat/completions \
-X POST \
-d '{
"model": "tgi",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is deep learning?"
}
],
"stream": true,
"max_tokens": 20
}' \
-H 'Content-Type: application/json'
```
## Streaming
You can also use OpenAI's Python client library to make a streaming request. Here's how:
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(
base_url="http://localhost:3000/v1",
api_key="-"
)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
],
stream=True
)
# iterate and print stream
for message in chat_completion:
print(message)
```
## Synchronous
If you prefer to make a synchronous request, you can do so like this:
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(
base_url="http://localhost:3000/v1",
api_key="-"
)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
],
stream=False
)
print(chat_completion)
```
## Hugging Face Inference Endpoints
The Messages API is integrated with [Inference Endpoints](https://huggingface.co/inference-endpoints/dedicated).
Every endpoint that uses "Text Generation Inference" with an LLM, which has a chat template can now be used. Below is an example of how to use IE with TGI using OpenAI's Python client library:
> **Note:** Make sure to replace `base_url` with your endpoint URL and to include `v1/` at the end of the URL. The `api_key` should be replaced with your Hugging Face API key.
```python
from openai import OpenAI
# init the client but point it to TGI
client = OpenAI(
# replace with your endpoint url, make sure to include "v1/" at the end
base_url="https://vlzz10eq3fol3429.us-east-1.aws.endpoints.huggingface.cloud/v1/",
# replace with your API key
api_key="hf_XXX"
)
chat_completion = client.chat.completions.create(
model="tgi",
messages=[
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
],
stream=True
)
# iterate and print stream
for message in chat_completion:
print(message.choices[0].delta.content, end="")
```
## Cloud Providers
TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
## Amazon SageMaker
To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`.
This will modify the `/invocations` route to accept Messages dictonaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
```python
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
try:
role = sagemaker.get_execution_role()
except ValueError:
iam = boto3.client('iam')
role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
# Hub Model configuration. https://huggingface.co/models
hub = {
'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
'SM_NUM_GPUS': json.dumps(1),
'MESSAGES_API_ENABLED': True
}
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
env=hub,
role=role,
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
initial_instance_count=1,
instance_type="ml.g5.2xlarge",
container_startup_health_check_timeout=300,
)
# send request
predictor.predict({
"messages": [
{"role": "system", "content": "You are a helpful assistant." },
{"role": "user", "content": "What is deep learning?"}
]
})
```

View File

@ -2,23 +2,27 @@
The easiest way of getting started is using the official Docker container. Install Docker following [their installation instructions](https://docs.docker.com/get-docker/).
Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) model with TGI. Here is an example on how to do that:
Let's say you want to deploy [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) model with TGI. Here is an example on how to do that:
```bash
model=tiiuae/falcon-7b-instruct
model=teknium/OpenHermes-2.5-Mistral-7B
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
```
<Tip warning={true}>
To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) . We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
To use TGI on RoCm-enabled AMD GPUs (only MI210 and MI250 are tested), please use the image `ghcr.io/huggingface/text-generation-inference:1.2+rocm` instead. For details about the usage on RoCm, please refer to the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html).
To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher.
</Tip>
TGI also supports ROCm-enabled AMD GPUs (only MI210 and MI250 are tested), details are available in the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html). To launch TGI on ROCm GPUs, please use instead:
```bash
docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4-rocm --model-id $model
```
Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
@ -49,7 +53,7 @@ print(response.json())
```js
async function query() {
const response = await fetch(
'http://127.0.0.1:8080/generate',
'http://127.0.0.1:8080/generate',
{
method: 'POST',
headers: { 'Content-Type': 'application/json'},
@ -87,7 +91,7 @@ curl 127.0.0.1:8080/generate \
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
```bash
docker run ghcr.io/huggingface/text-generation-inference:1.2 --help
docker run ghcr.io/huggingface/text-generation-inference:1.4 --help
```
</Tip>

View File

@ -19,7 +19,11 @@ The following models are optimized and can be served with TGI, which uses custom
- [MPT](https://huggingface.co/mosaicml/mpt-30b)
- [Llama V2](https://huggingface.co/meta-llama)
- [Code Llama](https://huggingface.co/codellama)
- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
- [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
- [Phi](https://huggingface.co/microsoft/phi-2)
- [Idefics](HuggingFaceM4/idefics-9b-instruct) (Multimodal)
- [Llava-next](llava-hf/llava-v1.6-mistral-7b-hf) (Multimodal)
If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models:
@ -39,9 +43,13 @@ text-generation-launcher --model-id <PATH-TO-LOCAL-BLOOM>
## Supported Hardware
TGI optimized models are supported on NVIDIA [A100](https://www.nvidia.com/en-us/data-center/a100/), [A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) and [T4](https://www.nvidia.com/en-us/data-center/tesla-t4/) GPUs with CUDA 11.8+. Note that you have to install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to use it. For other NVIDIA GPUs, continuous batching will still apply, but some operations like flash attention and paged attention will not be executed.
TGI optimized models are supported on NVIDIA [A100](https://www.nvidia.com/en-us/data-center/a100/), [A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) and [T4](https://www.nvidia.com/en-us/data-center/tesla-t4/) GPUs with CUDA 12.2+. Note that you have to install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to use it. For other NVIDIA GPUs, continuous batching will still apply, but some operations like flash attention and paged attention will not be executed.
TGI also has support of RoCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention and flash attention v2 support. The following features are missing from the RoCm version of TGI: quantization and flash [layer norm kernel](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm).
TGI also has support of ROCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention, GPTQ quantization, flash attention v2 support. The following features are currently not supported in the ROCm version of TGI, and the supported may be extended in the future:
* Loading [AWQ](https://huggingface.co/docs/transformers/quantization#awq) checkpoints.
* Flash [layer norm kernel](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm)
* Kernel for sliding window attention (Mistral)
TGI is also supported on the following AI hardware accelerators:
- *Habana first-gen Gaudi and Gaudi2:* check out this [example](https://github.com/huggingface/optimum-habana/tree/main/text-generation-inference) how to serve models with TGI on Gaudi and Gaudi2 with [Optimum Habana](https://huggingface.co/docs/optimum/habana/index)
- *Habana first-gen Gaudi and Gaudi2:* check out this [repository](https://github.com/huggingface/tgi-gaudi) to serve models with TGI on Gaudi and Gaudi2 with [Optimum Habana](https://huggingface.co/docs/optimum/habana/index)
* *AWS Inferentia2:* check out this [guide](https://github.com/huggingface/optimum-neuron/tree/main/text-generation-inference) on how to serve models with TGI on Inferentia2.

View File

@ -16,7 +16,17 @@ from syrupy.extensions.json import JSONSnapshotExtension
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from text_generation import AsyncClient
from text_generation.types import Response, Details, InputToken, Token, BestOfSequence
from text_generation.types import (
Response,
Details,
InputToken,
Token,
BestOfSequence,
Grammar,
ChatComplete,
ChatCompletionChunk,
ChatCompletionComplete,
)
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None)
@ -25,6 +35,7 @@ DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
class ResponseComparator(JSONSnapshotExtension):
rtol = 0.2
def serialize(
self,
data,
@ -32,8 +43,16 @@ class ResponseComparator(JSONSnapshotExtension):
exclude=None,
matcher=None,
):
if (
isinstance(data, Response)
or isinstance(data, ChatComplete)
or isinstance(data, ChatCompletionChunk)
or isinstance(data, ChatCompletionComplete)
):
data = data.model_dump()
if isinstance(data, List):
data = [d.dict() for d in data]
data = [d.model_dump() for d in data]
data = self._filter(
data=data, depth=0, path=(), exclude=exclude, matcher=matcher
@ -48,6 +67,15 @@ class ResponseComparator(JSONSnapshotExtension):
) -> bool:
def convert_data(data):
data = json.loads(data)
if isinstance(data, Dict) and "choices" in data:
choices = data["choices"]
if (
isinstance(choices, List)
and len(choices) >= 1
and "delta" in choices[0]
):
return ChatCompletionChunk(**data)
return ChatComplete(**data)
if isinstance(data, Dict):
return Response(**data)
@ -69,7 +97,9 @@ class ResponseComparator(JSONSnapshotExtension):
prefill_token.id == other.id
and prefill_token.text == other.text
and (
math.isclose(prefill_token.logprob, other.logprob, rel_tol=self.rtol)
math.isclose(
prefill_token.logprob, other.logprob, rel_tol=self.rtol
)
if prefill_token.logprob is not None
else prefill_token.logprob == other.logprob
)
@ -131,6 +161,16 @@ class ResponseComparator(JSONSnapshotExtension):
)
)
def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
return (
response.choices[0].message.content == other.choices[0].message.content
)
def eq_chat_complete_chunk(
response: ChatCompletionChunk, other: ChatCompletionChunk
) -> bool:
return response.choices[0].delta.content == other.choices[0].delta.content
def eq_response(response: Response, other: Response) -> bool:
return response.generated_text == other.generated_text and eq_details(
response.details, other.details
@ -144,6 +184,19 @@ class ResponseComparator(JSONSnapshotExtension):
if not isinstance(snapshot_data, List):
snapshot_data = [snapshot_data]
if isinstance(serialized_data[0], ChatComplete):
return len(snapshot_data) == len(serialized_data) and all(
[eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]
)
if isinstance(serialized_data[0], ChatCompletionChunk):
return len(snapshot_data) == len(serialized_data) and all(
[
eq_chat_complete_chunk(r, o)
for r, o in zip(serialized_data, snapshot_data)
]
)
return len(snapshot_data) == len(serialized_data) and all(
[eq_response(r, o) for r, o in zip(serialized_data, snapshot_data)]
)
@ -153,6 +206,7 @@ class GenerousResponseComparator(ResponseComparator):
# Needed for GPTQ with exllama which has serious numerical fluctuations.
rtol = 0.75
class LauncherHandle:
def __init__(self, port: int):
self.client = AsyncClient(f"http://localhost:{port}")
@ -198,6 +252,7 @@ class ProcessLauncherHandle(LauncherHandle):
def response_snapshot(snapshot):
return snapshot.use_extension(ResponseComparator)
@pytest.fixture
def generous_response_snapshot(snapshot):
return snapshot.use_extension(GenerousResponseComparator)
@ -219,7 +274,11 @@ def launcher(event_loop):
quantize: Optional[str] = None,
trust_remote_code: bool = False,
use_flash_attention: bool = True,
dtype: Optional[str] = None
disable_grammar_support: bool = False,
dtype: Optional[str] = None,
revision: Optional[str] = None,
max_input_length: Optional[int] = None,
max_total_tokens: Optional[int] = None,
):
port = random.randint(8000, 10_000)
master_port = random.randint(10_000, 20_000)
@ -242,6 +301,8 @@ def launcher(event_loop):
env = os.environ
if disable_grammar_support:
args.append("--disable-grammar-support")
if num_shard is not None:
args.extend(["--num-shard", str(num_shard)])
if quantize is not None:
@ -250,8 +311,17 @@ def launcher(event_loop):
if dtype is not None:
args.append("--dtype")
args.append(dtype)
if revision is not None:
args.append("--revision")
args.append(revision)
if trust_remote_code:
args.append("--trust-remote-code")
if max_input_length:
args.append("--max-input-length")
args.append(str(max_input_length))
if max_total_tokens:
args.append("--max-total-tokens")
args.append(str(max_total_tokens))
env["LOG_LEVEL"] = "info,text_generation_router=debug"
@ -282,12 +352,18 @@ def launcher(event_loop):
quantize: Optional[str] = None,
trust_remote_code: bool = False,
use_flash_attention: bool = True,
dtype: Optional[str] = None
disable_grammar_support: bool = False,
dtype: Optional[str] = None,
revision: Optional[str] = None,
max_input_length: Optional[int] = None,
max_total_tokens: Optional[int] = None,
):
port = random.randint(8000, 10_000)
args = ["--model-id", model_id, "--env"]
if disable_grammar_support:
args.append("--disable-grammar-support")
if num_shard is not None:
args.extend(["--num-shard", str(num_shard)])
if quantize is not None:
@ -296,8 +372,17 @@ def launcher(event_loop):
if dtype is not None:
args.append("--dtype")
args.append(dtype)
if revision is not None:
args.append("--revision")
args.append(revision)
if trust_remote_code:
args.append("--trust-remote-code")
if max_input_length:
args.append("--max-input-length")
args.append(str(max_input_length))
if max_total_tokens:
args.append("--max-total-tokens")
args.append(str(max_total_tokens))
client = docker.from_env()
@ -312,7 +397,9 @@ def launcher(event_loop):
gpu_count = num_shard if num_shard is not None else 1
env = {"LOG_LEVEL": "info,text_generation_router=debug"}
env = {
"LOG_LEVEL": "info,text_generation_router=debug",
}
if not use_flash_attention:
env["USE_FLASH_ATTENTION"] = "false"
@ -335,7 +422,7 @@ def launcher(event_loop):
],
volumes=volumes,
ports={"80/tcp": port},
shm_size="1G"
shm_size="1G",
)
yield ContainerLauncherHandle(client, container.name, port)
@ -362,11 +449,22 @@ def launcher(event_loop):
@pytest.fixture(scope="module")
def generate_load():
async def generate_load_inner(
client: AsyncClient, prompt: str, max_new_tokens: int, n: int
client: AsyncClient,
prompt: str,
max_new_tokens: int,
n: int,
seed: Optional[int] = None,
grammar: Optional[Grammar] = None,
stop_sequences: Optional[List[str]] = None,
) -> List[Response]:
futures = [
client.generate(
prompt, max_new_tokens=max_new_tokens, decoder_input_details=True
prompt,
max_new_tokens=max_new_tokens,
decoder_input_details=True,
seed=seed,
grammar=grammar,
stop_sequences=stop_sequences,
)
for _ in range(n)
]

Binary file not shown.

After

Width:  |  Height:  |  Size: 410 KiB

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 1736,
"logprob": -2.09375,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.8671875,
"special": false,
"text": "\n\n"
},
{
"id": 651,
"logprob": -2.4375,
"special": false,
"text": "The"
},
{
"id": 2121,
"logprob": -1.8203125,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23242188,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.08544922,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.9375,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.671875,
"special": false,
"text": " used"
},
{
"id": 577,
"logprob": -0.40429688,
"special": false,
"text": " to"
},
{
"id": 3853,
"logprob": -1.1875,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
}

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 7539,
"logprob": -0.73046875,
"special": false,
"text": " forms"
},
{
"id": 708,
"logprob": 0.0,
"special": false,
"text": " are"
},
{
"id": 671,
"logprob": -1.703125,
"special": false,
"text": " an"
},
{
"id": 8727,
"logprob": 0.0,
"special": false,
"text": " essential"
},
{
"id": 1702,
"logprob": 0.0,
"special": false,
"text": " part"
},
{
"id": 576,
"logprob": 0.0,
"special": false,
"text": " of"
},
{
"id": 573,
"logprob": 0.0,
"special": false,
"text": " the"
},
{
"id": 11859,
"logprob": -1.6953125,
"special": false,
"text": " lab"
},
{
"id": 2185,
"logprob": -1.3125,
"special": false,
"text": " process"
},
{
"id": 578,
"logprob": -1.5,
"special": false,
"text": " and"
}
],
"top_tokens": null
},
"generated_text": "Test request forms are an essential part of the lab process and"
}

View File

@ -0,0 +1,358 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 1736,
"logprob": -2.09375,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.9140625,
"special": false,
"text": "\n\n"
},
{
"id": 651,
"logprob": -2.453125,
"special": false,
"text": "The"
},
{
"id": 2121,
"logprob": -1.8984375,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23535156,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.091308594,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.96875,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.6484375,
"special": false,
"text": " used"
},
{
"id": 577,
"logprob": -0.43164062,
"special": false,
"text": " to"
},
{
"id": 3853,
"logprob": -1.2421875,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 1736,
"logprob": -2.09375,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.9140625,
"special": false,
"text": "\n\n"
},
{
"id": 651,
"logprob": -2.453125,
"special": false,
"text": "The"
},
{
"id": 2121,
"logprob": -1.8984375,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23535156,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.091308594,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.96875,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.6484375,
"special": false,
"text": " used"
},
{
"id": 577,
"logprob": -0.43164062,
"special": false,
"text": " to"
},
{
"id": 3853,
"logprob": -1.2421875,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 1736,
"logprob": -2.09375,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.9140625,
"special": false,
"text": "\n\n"
},
{
"id": 651,
"logprob": -2.453125,
"special": false,
"text": "The"
},
{
"id": 2121,
"logprob": -1.8984375,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23535156,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.091308594,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.96875,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.6484375,
"special": false,
"text": " used"
},
{
"id": 577,
"logprob": -0.43164062,
"special": false,
"text": " to"
},
{
"id": 3853,
"logprob": -1.2421875,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2,
"logprob": null,
"text": "<bos>"
},
{
"id": 2015,
"logprob": -10.0,
"text": "Test"
},
{
"id": 3853,
"logprob": -10.875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 1736,
"logprob": -2.09375,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.9140625,
"special": false,
"text": "\n\n"
},
{
"id": 651,
"logprob": -2.453125,
"special": false,
"text": "The"
},
{
"id": 2121,
"logprob": -1.8984375,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23535156,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.091308594,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.96875,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.6484375,
"special": false,
"text": " used"
},
{
"id": 577,
"logprob": -0.43164062,
"special": false,
"text": " to"
},
{
"id": 3853,
"logprob": -1.2421875,
"special": false,
"text": " request"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
}
]

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -13.90625,
"text": "Test"
},
{
"id": 2009,
"logprob": -12.328125,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.0566406,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -1.5253906,
"special": false,
"text": "\n"
},
{
"id": 29902,
"logprob": -2.7578125,
"special": false,
"text": "I"
},
{
"id": 4966,
"logprob": -1.9033203,
"special": false,
"text": " hope"
},
{
"id": 445,
"logprob": -0.5019531,
"special": false,
"text": " this"
},
{
"id": 6911,
"logprob": -0.21264648,
"special": false,
"text": " helps"
},
{
"id": 29991,
"logprob": -0.5991211,
"special": false,
"text": "!"
},
{
"id": 2803,
"logprob": -0.37475586,
"special": false,
"text": " Let"
},
{
"id": 592,
"logprob": -0.018463135,
"special": false,
"text": " me"
},
{
"id": 1073,
"logprob": -0.0008597374,
"special": false,
"text": " know"
}
],
"top_tokens": null
},
"generated_text": "\n\nI hope this helps! Let me know"
}

View File

@ -0,0 +1,274 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 30,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 5235,
"logprob": -10.0625,
"text": "info"
},
{
"id": 29901,
"logprob": -3.2324219,
"text": ":"
},
{
"id": 13260,
"logprob": -10.625,
"text": "dav"
},
{
"id": 333,
"logprob": -0.08276367,
"text": "id"
},
{
"id": 8753,
"logprob": -7.5273438,
"text": "hol"
},
{
"id": 17559,
"logprob": -3.8476562,
"text": "tz"
},
{
"id": 763,
"logprob": -10.140625,
"text": "like"
},
{
"id": 10697,
"logprob": -10.1953125,
"text": "trees"
},
{
"id": 322,
"logprob": -2.5742188,
"text": "and"
},
{
"id": 756,
"logprob": -7.4882812,
"text": "has"
},
{
"id": 1023,
"logprob": -5.0507812,
"text": "two"
},
{
"id": 274,
"logprob": -5.3164062,
"text": "c"
},
{
"id": 1446,
"logprob": -0.6694336,
"text": "ats"
},
{
"id": 29889,
"logprob": -0.9995117,
"text": "."
},
{
"id": 29871,
"logprob": -4.2421875,
"text": ""
}
],
"seed": null,
"tokens": [
{
"id": 6377,
"logprob": -0.14916992,
"special": false,
"text": "{\""
},
{
"id": 29888,
"logprob": -0.13598633,
"special": false,
"text": "f"
},
{
"id": 12935,
"logprob": -0.017669678,
"special": false,
"text": "irs"
},
{
"id": 29873,
"logprob": -0.00085639954,
"special": false,
"text": "t"
},
{
"id": 1170,
"logprob": -0.0054016113,
"special": false,
"text": "Name"
},
{
"id": 4710,
"logprob": -0.13549805,
"special": false,
"text": "\":\""
},
{
"id": 19504,
"logprob": -0.8852539,
"special": false,
"text": "David"
},
{
"id": 3284,
"logprob": -0.16394043,
"special": false,
"text": "\",\""
},
{
"id": 29882,
"logprob": -0.08862305,
"special": false,
"text": "h"
},
{
"id": 711,
"logprob": -0.66259766,
"special": false,
"text": "ob"
},
{
"id": 1609,
"logprob": -5.51939e-05,
"special": false,
"text": "by"
},
{
"id": 4710,
"logprob": -0.23120117,
"special": false,
"text": "\":\""
},
{
"id": 29911,
"logprob": -2.3730469,
"special": false,
"text": "T"
},
{
"id": 11003,
"logprob": -0.032104492,
"special": false,
"text": "rees"
},
{
"id": 3284,
"logprob": -0.22021484,
"special": false,
"text": "\",\""
},
{
"id": 4230,
"logprob": -0.06726074,
"special": false,
"text": "last"
},
{
"id": 1170,
"logprob": -0.003501892,
"special": false,
"text": "Name"
},
{
"id": 4710,
"logprob": -0.0045661926,
"special": false,
"text": "\":\""
},
{
"id": 29950,
"logprob": -0.12512207,
"special": false,
"text": "H"
},
{
"id": 14339,
"logprob": -0.009552002,
"special": false,
"text": "olt"
},
{
"id": 29920,
"logprob": -0.00042438507,
"special": false,
"text": "z"
},
{
"id": 3284,
"logprob": -0.11651611,
"special": false,
"text": "\",\""
},
{
"id": 29876,
"logprob": -0.29736328,
"special": false,
"text": "n"
},
{
"id": 398,
"logprob": -0.003030777,
"special": false,
"text": "um"
},
{
"id": 29907,
"logprob": -0.3774414,
"special": false,
"text": "C"
},
{
"id": 1446,
"logprob": -0.0003130436,
"special": false,
"text": "ats"
},
{
"id": 1115,
"logprob": -0.0021514893,
"special": false,
"text": "\":"
},
{
"id": 29906,
"logprob": -0.071899414,
"special": false,
"text": "2"
},
{
"id": 29913,
"logprob": -0.018997192,
"special": false,
"text": "}"
},
{
"id": 2,
"logprob": 0.0,
"special": true,
"text": "</s>"
}
],
"top_tokens": null
},
"generated_text": "{\"firstName\":\"David\",\"hobby\":\"Trees\",\"lastName\":\"Holtz\",\"numCats\":2}"
}

View File

@ -0,0 +1,478 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1024,
"logprob": -10.578125,
"text": "name"
},
{
"id": 29901,
"logprob": -3.0332031,
"text": ":"
},
{
"id": 13260,
"logprob": -9.171875,
"text": "dav"
},
{
"id": 333,
"logprob": -0.04257202,
"text": "id"
},
{
"id": 29889,
"logprob": -2.4785156,
"text": "."
},
{
"id": 4876,
"logprob": -10.7890625,
"text": "email"
},
{
"id": 29901,
"logprob": -0.32495117,
"text": ":"
},
{
"id": 259,
"logprob": -9.4921875,
"text": " "
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -0.7709961,
"special": false,
"text": "1"
},
{
"id": 29906,
"logprob": -0.33740234,
"special": false,
"text": "2"
},
{
"id": 29941,
"logprob": -0.00995636,
"special": false,
"text": "3"
},
{
"id": 29946,
"logprob": -0.64208984,
"special": false,
"text": "4"
},
{
"id": 29945,
"logprob": -0.4970703,
"special": false,
"text": "5"
},
{
"id": 29953,
"logprob": -0.46533203,
"special": false,
"text": "6"
},
{
"id": 29992,
"logprob": -0.5336914,
"special": false,
"text": "@"
},
{
"id": 21980,
"logprob": -0.5361328,
"special": false,
"text": "gmail"
},
{
"id": 29889,
"logprob": -0.00088739395,
"special": false,
"text": "."
},
{
"id": 510,
"logprob": -0.0022735596,
"special": false,
"text": "com"
}
],
"top_tokens": null
},
"generated_text": "123456@gmail.com"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1024,
"logprob": -10.578125,
"text": "name"
},
{
"id": 29901,
"logprob": -3.03125,
"text": ":"
},
{
"id": 13260,
"logprob": -9.171875,
"text": "dav"
},
{
"id": 333,
"logprob": -0.04244995,
"text": "id"
},
{
"id": 29889,
"logprob": -2.4863281,
"text": "."
},
{
"id": 4876,
"logprob": -10.7890625,
"text": "email"
},
{
"id": 29901,
"logprob": -0.32714844,
"text": ":"
},
{
"id": 259,
"logprob": -9.4921875,
"text": " "
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -0.7685547,
"special": false,
"text": "1"
},
{
"id": 29906,
"logprob": -0.33666992,
"special": false,
"text": "2"
},
{
"id": 29941,
"logprob": -0.01008606,
"special": false,
"text": "3"
},
{
"id": 29946,
"logprob": -0.64160156,
"special": false,
"text": "4"
},
{
"id": 29945,
"logprob": -0.5,
"special": false,
"text": "5"
},
{
"id": 29953,
"logprob": -0.46557617,
"special": false,
"text": "6"
},
{
"id": 29992,
"logprob": -0.5341797,
"special": false,
"text": "@"
},
{
"id": 21980,
"logprob": -0.5361328,
"special": false,
"text": "gmail"
},
{
"id": 29889,
"logprob": -0.00088739395,
"special": false,
"text": "."
},
{
"id": 510,
"logprob": -0.0022907257,
"special": false,
"text": "com"
}
],
"top_tokens": null
},
"generated_text": "123456@gmail.com"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1024,
"logprob": -10.578125,
"text": "name"
},
{
"id": 29901,
"logprob": -3.0332031,
"text": ":"
},
{
"id": 13260,
"logprob": -9.171875,
"text": "dav"
},
{
"id": 333,
"logprob": -0.04257202,
"text": "id"
},
{
"id": 29889,
"logprob": -2.4785156,
"text": "."
},
{
"id": 4876,
"logprob": -10.7890625,
"text": "email"
},
{
"id": 29901,
"logprob": -0.32495117,
"text": ":"
},
{
"id": 259,
"logprob": -9.4921875,
"text": " "
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -0.7709961,
"special": false,
"text": "1"
},
{
"id": 29906,
"logprob": -0.33740234,
"special": false,
"text": "2"
},
{
"id": 29941,
"logprob": -0.00995636,
"special": false,
"text": "3"
},
{
"id": 29946,
"logprob": -0.64208984,
"special": false,
"text": "4"
},
{
"id": 29945,
"logprob": -0.4970703,
"special": false,
"text": "5"
},
{
"id": 29953,
"logprob": -0.46533203,
"special": false,
"text": "6"
},
{
"id": 29992,
"logprob": -0.5336914,
"special": false,
"text": "@"
},
{
"id": 21980,
"logprob": -0.5361328,
"special": false,
"text": "gmail"
},
{
"id": 29889,
"logprob": -0.00088739395,
"special": false,
"text": "."
},
{
"id": 510,
"logprob": -0.0022735596,
"special": false,
"text": "com"
}
],
"top_tokens": null
},
"generated_text": "123456@gmail.com"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1024,
"logprob": -10.578125,
"text": "name"
},
{
"id": 29901,
"logprob": -3.0332031,
"text": ":"
},
{
"id": 13260,
"logprob": -9.171875,
"text": "dav"
},
{
"id": 333,
"logprob": -0.04257202,
"text": "id"
},
{
"id": 29889,
"logprob": -2.4785156,
"text": "."
},
{
"id": 4876,
"logprob": -10.7890625,
"text": "email"
},
{
"id": 29901,
"logprob": -0.32495117,
"text": ":"
},
{
"id": 259,
"logprob": -9.4921875,
"text": " "
}
],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -0.7709961,
"special": false,
"text": "1"
},
{
"id": 29906,
"logprob": -0.33740234,
"special": false,
"text": "2"
},
{
"id": 29941,
"logprob": -0.00995636,
"special": false,
"text": "3"
},
{
"id": 29946,
"logprob": -0.64208984,
"special": false,
"text": "4"
},
{
"id": 29945,
"logprob": -0.4970703,
"special": false,
"text": "5"
},
{
"id": 29953,
"logprob": -0.46533203,
"special": false,
"text": "6"
},
{
"id": 29992,
"logprob": -0.5336914,
"special": false,
"text": "@"
},
{
"id": 21980,
"logprob": -0.5361328,
"special": false,
"text": "gmail"
},
{
"id": 29889,
"logprob": -0.00088739395,
"special": false,
"text": "."
},
{
"id": 510,
"logprob": -0.0022735596,
"special": false,
"text": "com"
}
],
"top_tokens": null
},
"generated_text": "123456@gmail.com"
}
]

View File

@ -0,0 +1,109 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 806,
"logprob": -11.890625,
"text": "Wh"
},
{
"id": 1446,
"logprob": -3.6699219,
"text": "ats"
},
{
"id": 2921,
"logprob": -7.8203125,
"text": "Go"
},
{
"id": 468,
"logprob": -8.0703125,
"text": "og"
},
{
"id": 793,
"logprob": -2.1875,
"text": "les"
},
{
"id": 16332,
"logprob": -9.7109375,
"text": "DNS"
}
],
"seed": null,
"tokens": [
{
"id": 29946,
"logprob": -1.4765625,
"special": false,
"text": "4"
},
{
"id": 29906,
"logprob": -0.9199219,
"special": false,
"text": "2"
},
{
"id": 29889,
"logprob": 0.0,
"special": false,
"text": "."
},
{
"id": 29896,
"logprob": -1.1367188,
"special": false,
"text": "1"
},
{
"id": 29889,
"logprob": -1.4648438,
"special": false,
"text": "."
},
{
"id": 29896,
"logprob": -0.40722656,
"special": false,
"text": "1"
},
{
"id": 29889,
"logprob": -0.17419434,
"special": false,
"text": "."
},
{
"id": 29896,
"logprob": -0.20251465,
"special": false,
"text": "1"
},
{
"id": 29900,
"logprob": -1.5527344,
"special": false,
"text": "0"
},
{
"id": 29896,
"logprob": -1.3710938,
"special": false,
"text": "1"
}
],
"top_tokens": null
},
"generated_text": "42.1.1.101"
}

View File

@ -0,0 +1,73 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 29896,
"logprob": -0.7685547,
"special": false,
"text": "1"
},
{
"id": 29906,
"logprob": -0.33666992,
"special": false,
"text": "2"
},
{
"id": 29941,
"logprob": -0.009979248,
"special": false,
"text": "3"
},
{
"id": 29946,
"logprob": -0.64208984,
"special": false,
"text": "4"
},
{
"id": 29945,
"logprob": -0.4970703,
"special": false,
"text": "5"
},
{
"id": 29953,
"logprob": -0.46533203,
"special": false,
"text": "6"
},
{
"id": 29992,
"logprob": -0.5336914,
"special": false,
"text": "@"
},
{
"id": 21980,
"logprob": -0.53759766,
"special": false,
"text": "gmail"
},
{
"id": 29889,
"logprob": -0.0008878708,
"special": false,
"text": "."
},
{
"id": 510,
"logprob": -0.002275467,
"special": false,
"text": "com"
}
],
"top_tokens": null
},
"generated_text": "123456@gmail.com"
}

View File

@ -11,78 +11,79 @@
},
{
"id": 4321,
"logprob": -9.59375,
"logprob": -9.7890625,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6640625,
"logprob": -9.625,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 29918,
"logprob": -2.3867188,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -2.8183594,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -1.6367188,
"logprob": -2.3359375,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.0527344,
"logprob": -1.8779297,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.6542969,
"logprob": -1.2744141,
"special": false,
"text": " request"
},
{
"id": 29918,
"logprob": -0.056121826,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -0.01600647,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -0.87939453,
"logprob": -1.6933594,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.7529297,
"logprob": -1.4648438,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.2980957,
"logprob": -0.15600586,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.8027344,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.23022461,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0069885254,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.02218628,
"special": false,
"text": "\n"
}
]
],
"top_tokens": null
},
"generated_text": "_uri\nTest request_uri\nTest request"
"generated_text": "\nTest request\nTest request\nTest request\n"
}

View File

@ -11,12 +11,12 @@
},
{
"id": 4321,
"logprob": -9.6015625,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6640625,
"logprob": -9.6015625,
"text": "request"
}
],
@ -24,13 +24,13 @@
"tokens": [
{
"id": 29899,
"logprob": -1.1640625,
"logprob": -1.5625,
"special": false,
"text": "-"
},
{
"id": 1454,
"logprob": -0.07543945,
"logprob": -0.20410156,
"special": false,
"text": "for"
},
@ -54,19 +54,19 @@
},
{
"id": 396,
"logprob": -0.2956543,
"logprob": -0.27685547,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.52734375,
"logprob": -0.4970703,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.6899414,
"logprob": -0.80615234,
"special": false,
"text": "0"
},
@ -77,12 +77,13 @@
"text": "1"
},
{
"id": 29946,
"logprob": -1.5068359,
"id": 29955,
"logprob": -1.0751953,
"special": false,
"text": "4"
"text": "7"
}
]
],
"top_tokens": null
},
"generated_text": "Test request-for-comment: #2014"
"generated_text": "Test request-for-comment: #2017"
}

View File

@ -12,80 +12,81 @@
},
{
"id": 4321,
"logprob": -9.6015625,
"logprob": -9.828125,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.671875,
"logprob": -9.609375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 29918,
"logprob": -2.3828125,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -2.8105469,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -1.6396484,
"logprob": -2.3300781,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.0546875,
"logprob": -1.8740234,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.6513672,
"logprob": -1.2646484,
"special": false,
"text": " request"
},
{
"id": 29918,
"logprob": -0.056365967,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -0.016082764,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -0.87841797,
"logprob": -1.7158203,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.7548828,
"logprob": -1.4667969,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.29711914,
"logprob": -0.15344238,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.81591797,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22973633,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021957397,
"special": false,
"text": "\n"
}
]
],
"top_tokens": null
},
"generated_text": "_uri\nTest request_uri\nTest request"
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
@ -100,80 +101,81 @@
},
{
"id": 4321,
"logprob": -9.6015625,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6640625,
"logprob": -9.59375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 29918,
"logprob": -2.3828125,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -2.828125,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -1.6386719,
"logprob": -2.3378906,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.0527344,
"logprob": -1.8779297,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.6542969,
"logprob": -1.2636719,
"special": false,
"text": " request"
},
{
"id": 29918,
"logprob": -0.055877686,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -0.016021729,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -0.8769531,
"logprob": -1.6992188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.7583008,
"logprob": -1.4589844,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.29833984,
"logprob": -0.15344238,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.79052734,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22937012,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007041931,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022140503,
"special": false,
"text": "\n"
}
]
],
"top_tokens": null
},
"generated_text": "_uri\nTest request_uri\nTest request"
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
@ -188,80 +190,81 @@
},
{
"id": 4321,
"logprob": -9.6015625,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.671875,
"logprob": -9.609375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 29918,
"logprob": -2.3847656,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -2.8144531,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -1.6396484,
"logprob": -2.3261719,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.0527344,
"logprob": -1.8730469,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.65478516,
"logprob": -1.2587891,
"special": false,
"text": " request"
},
{
"id": 29918,
"logprob": -0.056243896,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -0.016143799,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -0.8808594,
"logprob": -1.6894531,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.75341797,
"logprob": -1.46875,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.2956543,
"logprob": -0.1541748,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.80322266,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22912598,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070495605,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021606445,
"special": false,
"text": "\n"
}
]
],
"top_tokens": null
},
"generated_text": "_uri\nTest request_uri\nTest request"
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
@ -276,79 +279,80 @@
},
{
"id": 4321,
"logprob": -9.6015625,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6640625,
"logprob": -9.6015625,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 29918,
"logprob": -2.3769531,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -2.8183594,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -1.6396484,
"logprob": -2.3320312,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.0546875,
"logprob": -1.875,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.65478516,
"logprob": -1.2646484,
"special": false,
"text": " request"
},
{
"id": 29918,
"logprob": -0.05557251,
"special": false,
"text": "_"
},
{
"id": 5338,
"logprob": -0.01612854,
"special": false,
"text": "uri"
},
{
"id": 13,
"logprob": -0.8730469,
"logprob": -1.6884766,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.7519531,
"logprob": -1.4589844,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.29785156,
"logprob": -0.15185547,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.79833984,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22827148,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006996155,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021560669,
"special": false,
"text": "\n"
}
]
],
"top_tokens": null
},
"generated_text": "_uri\nTest request_uri\nTest request"
"generated_text": "\nTest request\nTest request\nTest request\n"
}
]

View File

@ -0,0 +1,98 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 338,
"logprob": -10.0078125,
"text": "is"
},
{
"id": 21784,
"logprob": -15.515625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -2.8847656,
"text": "Learning"
},
{
"id": 29973,
"logprob": -4.140625,
"text": "?"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": -1.1582031,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.23083496,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": 0.0,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": 0.0,
"special": false,
"text": " learning"
},
{
"id": 29892,
"logprob": -0.61816406,
"special": false,
"text": ","
},
{
"id": 607,
"logprob": -0.7089844,
"special": false,
"text": " which"
},
{
"id": 508,
"logprob": -1.7724609,
"special": false,
"text": " can"
},
{
"id": 367,
"logprob": 0.0,
"special": false,
"text": " be"
},
{
"id": 5545,
"logprob": 0.0,
"special": false,
"text": " considered"
},
{
"id": 408,
"logprob": -0.3869629,
"special": false,
"text": " as"
}
]
},
"generated_text": "What is Deep Learning?\nDeep learning, which can be considered as"
}

View File

@ -0,0 +1,414 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -10.734375,
"text": "What"
},
{
"id": 338,
"logprob": -1.5488281,
"text": "is"
},
{
"id": 21784,
"logprob": -9.2890625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.2753906,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.48046875,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.1845703,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.5727539,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.00010967255,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.1239624,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.04510498,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.018295288,
"special": false,
"text": " a"
},
{
"id": 11306,
"logprob": -0.45922852,
"special": false,
"text": " subset"
},
{
"id": 310,
"logprob": -0.00020992756,
"special": false,
"text": " of"
},
{
"id": 4933,
"logprob": -0.0046539307,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.00025844574,
"special": false,
"text": " learning"
}
]
},
"generated_text": "\nDeep learning is a subset of machine learning"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -10.734375,
"text": "What"
},
{
"id": 338,
"logprob": -1.5488281,
"text": "is"
},
{
"id": 21784,
"logprob": -9.2890625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.2724609,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.47729492,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.1826172,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.56689453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.000108003616,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.1239624,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.044433594,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.018295288,
"special": false,
"text": " a"
},
{
"id": 11306,
"logprob": -0.45922852,
"special": false,
"text": " subset"
},
{
"id": 310,
"logprob": -0.0002104044,
"special": false,
"text": " of"
},
{
"id": 4933,
"logprob": -0.004711151,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.00025892258,
"special": false,
"text": " learning"
}
]
},
"generated_text": "\nDeep learning is a subset of machine learning"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -10.734375,
"text": "What"
},
{
"id": 338,
"logprob": -1.5488281,
"text": "is"
},
{
"id": 21784,
"logprob": -9.2890625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.2724609,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.47729492,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.1826172,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.56689453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.000108003616,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.1239624,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.044433594,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.018295288,
"special": false,
"text": " a"
},
{
"id": 11306,
"logprob": -0.45922852,
"special": false,
"text": " subset"
},
{
"id": 310,
"logprob": -0.0002104044,
"special": false,
"text": " of"
},
{
"id": 4933,
"logprob": -0.004711151,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.00025892258,
"special": false,
"text": " learning"
}
]
},
"generated_text": "\nDeep learning is a subset of machine learning"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -10.734375,
"text": "What"
},
{
"id": 338,
"logprob": -1.5488281,
"text": "is"
},
{
"id": 21784,
"logprob": -9.2890625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.2724609,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.47729492,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.1826172,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.56689453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.000108003616,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.1239624,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.044433594,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.018295288,
"special": false,
"text": " a"
},
{
"id": 11306,
"logprob": -0.45922852,
"special": false,
"text": " subset"
},
{
"id": 310,
"logprob": -0.0002104044,
"special": false,
"text": " of"
},
{
"id": 4933,
"logprob": -0.004711151,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.00025892258,
"special": false,
"text": " learning"
}
]
},
"generated_text": "\nDeep learning is a subset of machine learning"
}
]

View File

@ -0,0 +1,103 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -10.734375,
"text": "What"
},
{
"id": 338,
"logprob": -1.5488281,
"text": "is"
},
{
"id": 21784,
"logprob": -9.2890625,
"text": "Deep"
},
{
"id": 29257,
"logprob": -1.2753906,
"text": "Learning"
},
{
"id": 29973,
"logprob": -0.48046875,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -1.1845703,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -0.5727539,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.000108122826,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.1239624,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.044433594,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.01852417,
"special": false,
"text": " a"
},
{
"id": 11306,
"logprob": -0.45922852,
"special": false,
"text": " subset"
},
{
"id": 310,
"logprob": -0.0002104044,
"special": false,
"text": " of"
},
{
"id": 4933,
"logprob": -0.004787445,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.00026226044,
"special": false,
"text": " learning"
}
]
},
"generated_text": "\nDeep learning is a subset of machine learning"
}

View File

@ -0,0 +1,84 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 25,
"logprob": -2.3203125,
"special": false,
"text": ":"
},
{
"id": 1391,
"logprob": -0.98779297,
"special": false,
"text": " {"
},
{
"id": 25927,
"logprob": -0.76660156,
"special": false,
"text": "request"
},
{
"id": 92,
"logprob": -0.7246094,
"special": false,
"text": "}"
},
{
"id": 4943,
"logprob": -0.41333008,
"special": false,
"text": "\")"
},
{
"id": 198,
"logprob": -0.11785889,
"special": false,
"text": "\n"
},
{
"id": 50280,
"logprob": -0.97265625,
"special": false,
"text": " "
},
{
"id": 26209,
"logprob": -1.4414062,
"special": false,
"text": "response"
},
{
"id": 796,
"logprob": -0.0569458,
"special": false,
"text": " ="
},
{
"id": 2116,
"logprob": -1.1533203,
"special": false,
"text": " self"
}
],
"top_tokens": null
},
"generated_text": ": {request}\")\n response = self"
}

View File

@ -0,0 +1,60 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "stop_sequence",
"generated_tokens": 6,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 284,
"logprob": -0.19421387,
"special": false,
"text": " to"
},
{
"id": 3758,
"logprob": -0.62597656,
"special": false,
"text": " send"
},
{
"id": 1366,
"logprob": -0.87060547,
"special": false,
"text": " data"
},
{
"id": 625,
"logprob": -0.88427734,
"special": false,
"text": " over"
},
{
"id": 257,
"logprob": -1.0830078,
"special": false,
"text": " a"
},
{
"id": 3127,
"logprob": -1.9462891,
"special": false,
"text": " network"
}
],
"top_tokens": null
},
"generated_text": "Test request to send data over a network"
}

View File

@ -0,0 +1,338 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 25,
"logprob": -2.3203125,
"special": false,
"text": ":"
},
{
"id": 1391,
"logprob": -0.98779297,
"special": false,
"text": " {"
},
{
"id": 25927,
"logprob": -0.7729492,
"special": false,
"text": "request"
},
{
"id": 92,
"logprob": -0.7241211,
"special": false,
"text": "}"
},
{
"id": 4943,
"logprob": -0.4091797,
"special": false,
"text": "\")"
},
{
"id": 198,
"logprob": -0.119018555,
"special": false,
"text": "\n"
},
{
"id": 50280,
"logprob": -0.9707031,
"special": false,
"text": " "
},
{
"id": 26209,
"logprob": -1.4414062,
"special": false,
"text": "response"
},
{
"id": 796,
"logprob": -0.056854248,
"special": false,
"text": " ="
},
{
"id": 2116,
"logprob": -1.1533203,
"special": false,
"text": " self"
}
],
"top_tokens": null
},
"generated_text": ": {request}\")\n response = self"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 25,
"logprob": -2.3203125,
"special": false,
"text": ":"
},
{
"id": 1391,
"logprob": -0.98779297,
"special": false,
"text": " {"
},
{
"id": 25927,
"logprob": -0.7729492,
"special": false,
"text": "request"
},
{
"id": 92,
"logprob": -0.7241211,
"special": false,
"text": "}"
},
{
"id": 4943,
"logprob": -0.4091797,
"special": false,
"text": "\")"
},
{
"id": 198,
"logprob": -0.119018555,
"special": false,
"text": "\n"
},
{
"id": 50280,
"logprob": -0.9707031,
"special": false,
"text": " "
},
{
"id": 26209,
"logprob": -1.4414062,
"special": false,
"text": "response"
},
{
"id": 796,
"logprob": -0.056854248,
"special": false,
"text": " ="
},
{
"id": 2116,
"logprob": -1.1533203,
"special": false,
"text": " self"
}
],
"top_tokens": null
},
"generated_text": ": {request}\")\n response = self"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 25,
"logprob": -2.3203125,
"special": false,
"text": ":"
},
{
"id": 1391,
"logprob": -0.98779297,
"special": false,
"text": " {"
},
{
"id": 25927,
"logprob": -0.7729492,
"special": false,
"text": "request"
},
{
"id": 92,
"logprob": -0.7241211,
"special": false,
"text": "}"
},
{
"id": 4943,
"logprob": -0.4091797,
"special": false,
"text": "\")"
},
{
"id": 198,
"logprob": -0.119018555,
"special": false,
"text": "\n"
},
{
"id": 50280,
"logprob": -0.9707031,
"special": false,
"text": " "
},
{
"id": 26209,
"logprob": -1.4414062,
"special": false,
"text": "response"
},
{
"id": 796,
"logprob": -0.056854248,
"special": false,
"text": " ="
},
{
"id": 2116,
"logprob": -1.1533203,
"special": false,
"text": " self"
}
],
"top_tokens": null
},
"generated_text": ": {request}\")\n response = self"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 14402,
"logprob": null,
"text": "Test"
},
{
"id": 2581,
"logprob": -11.6171875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 25,
"logprob": -2.3203125,
"special": false,
"text": ":"
},
{
"id": 1391,
"logprob": -0.98779297,
"special": false,
"text": " {"
},
{
"id": 25927,
"logprob": -0.7729492,
"special": false,
"text": "request"
},
{
"id": 92,
"logprob": -0.7241211,
"special": false,
"text": "}"
},
{
"id": 4943,
"logprob": -0.4091797,
"special": false,
"text": "\")"
},
{
"id": 198,
"logprob": -0.119018555,
"special": false,
"text": "\n"
},
{
"id": 50280,
"logprob": -0.9707031,
"special": false,
"text": " "
},
{
"id": 26209,
"logprob": -1.4414062,
"special": false,
"text": "response"
},
{
"id": 796,
"logprob": -0.056854248,
"special": false,
"text": " ="
},
{
"id": 2116,
"logprob": -1.1533203,
"special": false,
"text": " self"
}
],
"top_tokens": null
},
"generated_text": ": {request}\")\n response = self"
}
]

View File

@ -0,0 +1,84 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 198,
"logprob": -2.9023438,
"special": false,
"text": "\n"
},
{
"id": 2,
"logprob": -2.9160156,
"special": false,
"text": "#"
},
{
"id": 4230,
"logprob": -3.1035156,
"special": false,
"text": " Create"
},
{
"id": 264,
"logprob": -1.1025391,
"special": false,
"text": " a"
},
{
"id": 1681,
"logprob": -1.6914062,
"special": false,
"text": " request"
},
{
"id": 198,
"logprob": -1.1953125,
"special": false,
"text": "\n"
},
{
"id": 2035,
"logprob": -1.3203125,
"special": false,
"text": "request"
},
{
"id": 284,
"logprob": -0.13537598,
"special": false,
"text": " ="
},
{
"id": 7388,
"logprob": -1.2402344,
"special": false,
"text": " requests"
},
{
"id": 670,
"logprob": -0.2775879,
"special": false,
"text": ".get"
}
],
"top_tokens": null
},
"generated_text": "\n# Create a request\nrequest = requests.get"
}

View File

@ -0,0 +1,84 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 311,
"logprob": -1.4277344,
"special": false,
"text": " to"
},
{
"id": 279,
"logprob": -0.65478516,
"special": false,
"text": " the"
},
{
"id": 2473,
"logprob": -1.8300781,
"special": false,
"text": " service"
},
{
"id": 382,
"logprob": -0.75,
"special": false,
"text": ".\n\n"
},
{
"id": 286,
"logprob": -0.11621094,
"special": false,
"text": " "
},
{
"id": 549,
"logprob": 0.0,
"special": false,
"text": " :"
},
{
"id": 689,
"logprob": -0.48608398,
"special": false,
"text": "return"
},
{
"id": 25,
"logprob": 0.0,
"special": false,
"text": ":"
},
{
"id": 5949,
"logprob": -0.5756836,
"special": false,
"text": " Response"
},
{
"id": 504,
"logprob": -0.24499512,
"special": false,
"text": " from"
}
],
"top_tokens": null
},
"generated_text": "Test request to the service.\n\n :return: Response from"
}

View File

@ -0,0 +1,338 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 198,
"logprob": -2.9023438,
"special": false,
"text": "\n"
},
{
"id": 2,
"logprob": -2.9140625,
"special": false,
"text": "#"
},
{
"id": 4230,
"logprob": -3.1054688,
"special": false,
"text": " Create"
},
{
"id": 264,
"logprob": -1.0966797,
"special": false,
"text": " a"
},
{
"id": 1681,
"logprob": -1.6914062,
"special": false,
"text": " request"
},
{
"id": 198,
"logprob": -1.1923828,
"special": false,
"text": "\n"
},
{
"id": 2035,
"logprob": -1.3193359,
"special": false,
"text": "request"
},
{
"id": 284,
"logprob": -0.13586426,
"special": false,
"text": " ="
},
{
"id": 7388,
"logprob": -1.2412109,
"special": false,
"text": " requests"
},
{
"id": 670,
"logprob": -0.2775879,
"special": false,
"text": ".get"
}
],
"top_tokens": null
},
"generated_text": "\n# Create a request\nrequest = requests.get"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 198,
"logprob": -2.9023438,
"special": false,
"text": "\n"
},
{
"id": 2,
"logprob": -2.9140625,
"special": false,
"text": "#"
},
{
"id": 4230,
"logprob": -3.1054688,
"special": false,
"text": " Create"
},
{
"id": 264,
"logprob": -1.0966797,
"special": false,
"text": " a"
},
{
"id": 1681,
"logprob": -1.6914062,
"special": false,
"text": " request"
},
{
"id": 198,
"logprob": -1.1923828,
"special": false,
"text": "\n"
},
{
"id": 2035,
"logprob": -1.3193359,
"special": false,
"text": "request"
},
{
"id": 284,
"logprob": -0.13586426,
"special": false,
"text": " ="
},
{
"id": 7388,
"logprob": -1.2412109,
"special": false,
"text": " requests"
},
{
"id": 670,
"logprob": -0.2775879,
"special": false,
"text": ".get"
}
],
"top_tokens": null
},
"generated_text": "\n# Create a request\nrequest = requests.get"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 198,
"logprob": -2.9023438,
"special": false,
"text": "\n"
},
{
"id": 2,
"logprob": -2.9140625,
"special": false,
"text": "#"
},
{
"id": 4230,
"logprob": -3.1054688,
"special": false,
"text": " Create"
},
{
"id": 264,
"logprob": -1.0966797,
"special": false,
"text": " a"
},
{
"id": 1681,
"logprob": -1.6914062,
"special": false,
"text": " request"
},
{
"id": 198,
"logprob": -1.1923828,
"special": false,
"text": "\n"
},
{
"id": 2035,
"logprob": -1.3193359,
"special": false,
"text": "request"
},
{
"id": 284,
"logprob": -0.13586426,
"special": false,
"text": " ="
},
{
"id": 7388,
"logprob": -1.2412109,
"special": false,
"text": " requests"
},
{
"id": 670,
"logprob": -0.2775879,
"special": false,
"text": ".get"
}
],
"top_tokens": null
},
"generated_text": "\n# Create a request\nrequest = requests.get"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2271,
"logprob": null,
"text": "Test"
},
{
"id": 1681,
"logprob": -8.8515625,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 198,
"logprob": -2.9023438,
"special": false,
"text": "\n"
},
{
"id": 2,
"logprob": -2.9140625,
"special": false,
"text": "#"
},
{
"id": 4230,
"logprob": -3.1054688,
"special": false,
"text": " Create"
},
{
"id": 264,
"logprob": -1.0966797,
"special": false,
"text": " a"
},
{
"id": 1681,
"logprob": -1.6914062,
"special": false,
"text": " request"
},
{
"id": 198,
"logprob": -1.1923828,
"special": false,
"text": "\n"
},
{
"id": 2035,
"logprob": -1.3193359,
"special": false,
"text": "request"
},
{
"id": 284,
"logprob": -0.13586426,
"special": false,
"text": " ="
},
{
"id": 7388,
"logprob": -1.2412109,
"special": false,
"text": " requests"
},
{
"id": 670,
"logprob": -0.2775879,
"special": false,
"text": ".get"
}
],
"top_tokens": null
},
"generated_text": "\n# Create a request\nrequest = requests.get"
}
]

View File

@ -0,0 +1,94 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": null,
"tokens": [
{
"id": 2284,
"logprob": -0.92626953,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": -0.40844727,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": -0.27905273,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": -0.6118164,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.68652344,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -1.4619141,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.7993164,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.63134766,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": -0.23278809,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -1.2294922,
"special": false,
"text": "def"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef"
}

View File

@ -0,0 +1,394 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 60,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": 0,
"tokens": [
{
"id": 2284,
"logprob": -0.296875,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": 0.0,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": 0.0,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": 0.0,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.28125,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -0.79248047,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.61816406,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.0619812,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -0.4091797,
"special": false,
"text": "def"
},
{
"id": 1489,
"logprob": 0.0,
"special": false,
"text": " print"
},
{
"id": 100,
"logprob": 0.0,
"special": false,
"text": "_"
},
{
"id": 7670,
"logprob": 0.0,
"special": false,
"text": "hello"
},
{
"id": 100,
"logprob": 0.0,
"special": false,
"text": "_"
},
{
"id": 444,
"logprob": -0.21655273,
"special": false,
"text": "name"
},
{
"id": 45,
"logprob": 0.0,
"special": false,
"text": "("
},
{
"id": 444,
"logprob": 0.0,
"special": false,
"text": "name"
},
{
"id": 731,
"logprob": 0.0,
"special": false,
"text": "):"
},
{
"id": 303,
"logprob": 0.0,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": 0.0,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": 0.0,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": 0.0,
"special": false,
"text": "Hello"
},
{
"id": 332,
"logprob": -0.034698486,
"special": false,
"text": " \""
},
{
"id": 494,
"logprob": 0.0,
"special": false,
"text": " +"
},
{
"id": 655,
"logprob": 0.0,
"special": false,
"text": " name"
},
{
"id": 494,
"logprob": -0.20141602,
"special": false,
"text": " +"
},
{
"id": 332,
"logprob": 0.0,
"special": false,
"text": " \""
},
{
"id": 16013,
"logprob": 0.0,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": 0.0,
"special": false,
"text": "def"
},
{
"id": 1489,
"logprob": 0.0,
"special": false,
"text": " print"
},
{
"id": 100,
"logprob": 0.0,
"special": false,
"text": "_"
},
{
"id": 7670,
"logprob": 0.0,
"special": false,
"text": "hello"
},
{
"id": 100,
"logprob": 0.0,
"special": false,
"text": "_"
},
{
"id": 444,
"logprob": 0.0,
"special": false,
"text": "name"
},
{
"id": 100,
"logprob": 0.0,
"special": false,
"text": "_"
},
{
"id": 400,
"logprob": 0.0,
"special": false,
"text": "age"
},
{
"id": 45,
"logprob": 0.0,
"special": false,
"text": "("
},
{
"id": 444,
"logprob": 0.0,
"special": false,
"text": "name"
},
{
"id": 49,
"logprob": 0.0,
"special": false,
"text": ","
},
{
"id": 11505,
"logprob": 0.0,
"special": false,
"text": " age"
},
{
"id": 731,
"logprob": 0.0,
"special": false,
"text": "):"
},
{
"id": 303,
"logprob": 0.0,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": 0.0,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": 0.0,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": 0.0,
"special": false,
"text": "Hello"
},
{
"id": 332,
"logprob": 0.0,
"special": false,
"text": " \""
},
{
"id": 494,
"logprob": 0.0,
"special": false,
"text": " +"
},
{
"id": 655,
"logprob": 0.0,
"special": false,
"text": " name"
},
{
"id": 494,
"logprob": 0.0,
"special": false,
"text": " +"
},
{
"id": 3021,
"logprob": -0.5761719,
"special": false,
"text": " \","
},
{
"id": 863,
"logprob": 0.0,
"special": false,
"text": " you"
},
{
"id": 904,
"logprob": 0.0,
"special": false,
"text": " are"
},
{
"id": 332,
"logprob": 0.0,
"special": false,
"text": " \""
},
{
"id": 494,
"logprob": 0.0,
"special": false,
"text": " +"
},
{
"id": 615,
"logprob": 0.0,
"special": false,
"text": " str"
},
{
"id": 45,
"logprob": 0.0,
"special": false,
"text": "("
},
{
"id": 400,
"logprob": 0.0,
"special": false,
"text": "age"
},
{
"id": 46,
"logprob": 0.0,
"special": false,
"text": ")"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef print_hello_name(name):\n print(\"Hello \" + name + \"!\")\n\ndef print_hello_name_age(name, age):\n print(\"Hello \" + name + \", you are \" + str(age)"
}

View File

@ -0,0 +1,378 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": null,
"tokens": [
{
"id": 2284,
"logprob": -0.92626953,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": -0.40722656,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": -0.27954102,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": -0.6142578,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.68310547,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -1.4570312,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.80126953,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.6303711,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": -0.23327637,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -1.2304688,
"special": false,
"text": "def"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": null,
"tokens": [
{
"id": 2284,
"logprob": -0.92626953,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": -0.40722656,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": -0.27954102,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": -0.6142578,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.68310547,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -1.4570312,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.80126953,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.6303711,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": -0.23327637,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -1.2304688,
"special": false,
"text": "def"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": null,
"tokens": [
{
"id": 2284,
"logprob": -0.92626953,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": -0.40722656,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": -0.27954102,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": -0.6142578,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.68310547,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -1.4570312,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.80126953,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.6303711,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": -0.23327637,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -1.2304688,
"special": false,
"text": "def"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 610,
"logprob": null,
"text": "def"
},
{
"id": 1489,
"logprob": -5.2617188,
"text": " print"
},
{
"id": 100,
"logprob": -0.38476562,
"text": "_"
},
{
"id": 7670,
"logprob": -7.640625,
"text": "hello"
}
],
"seed": null,
"tokens": [
{
"id": 2284,
"logprob": -0.92626953,
"special": false,
"text": "():"
},
{
"id": 303,
"logprob": -0.40722656,
"special": false,
"text": "\n "
},
{
"id": 1489,
"logprob": -0.27954102,
"special": false,
"text": " print"
},
{
"id": 459,
"logprob": -0.6142578,
"special": false,
"text": "(\""
},
{
"id": 8302,
"logprob": -0.68310547,
"special": false,
"text": "Hello"
},
{
"id": 10914,
"logprob": -1.4570312,
"special": false,
"text": " World"
},
{
"id": 16013,
"logprob": -0.80126953,
"special": false,
"text": "!\")"
},
{
"id": 222,
"logprob": -0.6303711,
"special": false,
"text": "\n"
},
{
"id": 222,
"logprob": -0.23327637,
"special": false,
"text": "\n"
},
{
"id": 610,
"logprob": -1.2304688,
"special": false,
"text": "def"
}
],
"top_tokens": null
},
"generated_text": "():\n print(\"Hello World!\")\n\ndef"
}
]

View File

@ -1,193 +1,194 @@
{
"generated_text": "\n return sum(L) / len(L)\n\n\ndef geometric_mean(L",
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 20,
"seed": null,
"prefill": [
{
"id": 589,
"text": "def",
"logprob": null
"logprob": null,
"text": "def"
},
{
"id": 3226,
"text": " ge",
"logprob": -9.0234375
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"text": "ometric",
"logprob": -9.0859375
"logprob": -7.5859375,
"text": "ometric"
},
{
"id": 81,
"text": "_",
"logprob": -0.25878906
"logprob": -0.2668457,
"text": "_"
},
{
"id": 6009,
"text": "mean",
"logprob": -2.2109375
"logprob": -1.6416016,
"text": "mean"
},
{
"id": 26,
"text": "(",
"logprob": -0.30371094
"logprob": -0.22705078,
"text": "("
},
{
"id": 62,
"text": "L",
"logprob": -5.6054688
"logprob": -5.2304688,
"text": "L"
},
{
"id": 44,
"text": ":",
"logprob": -3.0722656
"logprob": -3.0976562,
"text": ":"
},
{
"id": 1682,
"text": " List",
"logprob": -0.6879883
"logprob": -1.1044922,
"text": " List"
},
{
"id": 77,
"text": "[",
"logprob": -0.38500977
"logprob": -0.14294434,
"text": "["
},
{
"id": 1808,
"text": "float",
"logprob": -0.984375
"logprob": -0.32299805,
"text": "float"
},
{
"id": 10794,
"text": "]):",
"logprob": -2.5351562
"logprob": -2.8164062,
"text": "]):"
}
],
"seed": null,
"tokens": [
{
"id": 284,
"text": "\n ",
"logprob": -1.1738281,
"special": false
"logprob": -0.1282959,
"special": false,
"text": "\n "
},
{
"id": 442,
"text": " return",
"logprob": -0.95947266,
"special": false
"id": 1524,
"logprob": -0.97998047,
"special": false,
"text": " \"\"\""
},
{
"id": 3632,
"text": " sum",
"logprob": -1.4199219,
"special": false
"id": 284,
"logprob": -0.7006836,
"special": false,
"text": "\n "
},
{
"id": 26,
"text": "(",
"logprob": -0.085876465,
"special": false
"id": 14883,
"logprob": -2.1933594,
"special": false,
"text": " Calculate"
},
{
"id": 62,
"text": "L",
"logprob": -0.09875488,
"special": false
},
{
"id": 27,
"text": ")",
"logprob": -0.30517578,
"special": false
},
{
"id": 517,
"text": " /",
"logprob": -0.42089844,
"special": false
},
{
"id": 2069,
"text": " len",
"logprob": -0.042053223,
"special": false
},
{
"id": 26,
"text": "(",
"logprob": -0.0011806488,
"special": false
},
{
"id": 62,
"text": "L",
"logprob": -0.0005259514,
"special": false
},
{
"id": 27,
"text": ")",
"logprob": -0.0017633438,
"special": false
},
{
"id": 478,
"text": "\n\n",
"logprob": -0.69189453,
"special": false
},
{
"id": 203,
"text": "\n",
"logprob": -0.041870117,
"special": false
},
{
"id": 589,
"text": "def",
"logprob": -0.27856445,
"special": false
"id": 322,
"logprob": -0.2697754,
"special": false,
"text": " the"
},
{
"id": 3226,
"text": " ge",
"logprob": -1.7255859,
"special": false
"logprob": -0.0836792,
"special": false,
"text": " ge"
},
{
"id": 21017,
"text": "ometric",
"logprob": -0.011291504,
"special": false
"logprob": -0.018737793,
"special": false,
"text": "ometric"
},
{
"id": 81,
"text": "_",
"logprob": -0.008430481,
"special": false
"id": 5651,
"logprob": -0.028640747,
"special": false,
"text": " mean"
},
{
"id": 6009,
"text": "mean",
"logprob": -0.025787354,
"special": false
"id": 432,
"logprob": -0.29467773,
"special": false,
"text": " of"
},
{
"id": 26,
"text": "(",
"logprob": -0.073913574,
"special": false
"id": 312,
"logprob": -0.31518555,
"special": false,
"text": " a"
},
{
"id": 62,
"text": "L",
"logprob": -0.09967041,
"special": false
"id": 1149,
"logprob": -0.20605469,
"special": false,
"text": " list"
},
{
"id": 432,
"logprob": -0.23254395,
"special": false,
"text": " of"
},
{
"id": 7515,
"logprob": -0.4489746,
"special": false,
"text": " numbers"
},
{
"id": 32,
"logprob": -0.6044922,
"special": false,
"text": "."
},
{
"id": 446,
"logprob": -0.63964844,
"special": false,
"text": "\n\n "
},
{
"id": 499,
"logprob": -1.1953125,
"special": false,
"text": " :"
},
{
"id": 753,
"logprob": -0.03515625,
"special": false,
"text": "param"
},
{
"id": 498,
"logprob": -0.06311035,
"special": false,
"text": " L"
},
{
"id": 44,
"logprob": -0.003414154,
"special": false,
"text": ":"
},
{
"id": 1682,
"logprob": -1.3310547,
"special": false,
"text": " List"
}
]
}
],
"top_tokens": null
},
"generated_text": "\n \"\"\"\n Calculate the geometric mean of a list of numbers.\n\n :param L: List"
}

View File

@ -11,57 +11,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.09375,
"logprob": -7.5898438,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25976562,
"logprob": -0.26586914,
"text": "_"
},
{
"id": 6009,
"logprob": -2.2148438,
"logprob": -1.6347656,
"text": "mean"
},
{
"id": 26,
"logprob": -0.3010254,
"logprob": -0.22705078,
"text": "("
},
{
"id": 62,
"logprob": -5.6757812,
"logprob": -5.2382812,
"text": "L"
},
{
"id": 44,
"logprob": -3.0898438,
"logprob": -3.0996094,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6791992,
"logprob": -1.1025391,
"text": " List"
},
{
"id": 77,
"logprob": -0.38891602,
"logprob": -0.14294434,
"text": "["
},
{
"id": 1808,
"logprob": -0.92041016,
"logprob": -0.32226562,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5390625,
"logprob": -2.8164062,
"text": "]):"
}
],
@ -75,13 +75,13 @@
},
{
"id": 442,
"logprob": 0.0,
"logprob": -1.3134766,
"special": false,
"text": " return"
},
{
"id": 11665,
"logprob": -1.6005859,
"logprob": -0.10021973,
"special": false,
"text": " reduce"
},
@ -129,7 +129,7 @@
},
{
"id": 319,
"logprob": 0.0,
"logprob": -0.42871094,
"special": false,
"text": " *"
},
@ -158,36 +158,37 @@
"text": ")"
},
{
"id": 203,
"logprob": -0.11968994,
"special": false,
"text": "\n"
},
{
"id": 203,
"id": 1115,
"logprob": 0.0,
"special": false,
"text": "\n"
"text": " **"
},
{
"id": 589,
"id": 308,
"logprob": 0.0,
"special": false,
"text": "def"
"text": " ("
},
{
"id": 3226,
"id": 35,
"logprob": 0.0,
"special": false,
"text": " ge"
"text": "1"
},
{
"id": 21017,
"id": 32,
"logprob": -0.31323242,
"special": false,
"text": "."
},
{
"id": 34,
"logprob": 0.0,
"special": false,
"text": "ometric"
"text": "0"
}
]
],
"top_tokens": null
},
"generated_text": "\n return reduce(lambda x, y: x * y, L)\n\ndef geometric"
"generated_text": "\n return reduce(lambda x, y: x * y, L) ** (1.0"
}

View File

@ -12,57 +12,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -7.5820312,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25927734,
"logprob": -0.26708984,
"text": "_"
},
{
"id": 6009,
"logprob": -2.25,
"logprob": -1.6386719,
"text": "mean"
},
{
"id": 26,
"logprob": -0.30126953,
"logprob": -0.22717285,
"text": "("
},
{
"id": 62,
"logprob": -5.7539062,
"logprob": -5.234375,
"text": "L"
},
{
"id": 44,
"logprob": -3.0878906,
"logprob": -3.1015625,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6845703,
"logprob": -1.1083984,
"text": " List"
},
{
"id": 77,
"logprob": -0.3918457,
"logprob": -0.14294434,
"text": "["
},
{
"id": 1808,
"logprob": -0.8798828,
"logprob": -0.32592773,
"text": "float"
},
{
"id": 10794,
"logprob": -2.4980469,
"logprob": -2.8164062,
"text": "]):"
}
],
@ -70,67 +70,68 @@
"tokens": [
{
"id": 284,
"logprob": -1.1533203,
"logprob": -0.12817383,
"special": false,
"text": "\n "
},
{
"id": 442,
"logprob": -0.91796875,
"id": 1524,
"logprob": -0.9863281,
"special": false,
"text": " return"
"text": " \"\"\""
},
{
"id": 3632,
"logprob": -1.3291016,
"id": 284,
"logprob": -0.7011719,
"special": false,
"text": " sum"
"text": "\n "
},
{
"id": 26,
"logprob": -0.08062744,
"id": 14883,
"logprob": -2.2050781,
"special": false,
"text": "("
"text": " Calculate"
},
{
"id": 62,
"logprob": -0.097717285,
"id": 322,
"logprob": -0.2668457,
"special": false,
"text": "L"
"text": " the"
},
{
"id": 27,
"logprob": -0.29003906,
"id": 3226,
"logprob": -0.08465576,
"special": false,
"text": ")"
"text": " ge"
},
{
"id": 517,
"logprob": -0.34958984,
"id": 21017,
"logprob": -0.019012451,
"special": false,
"text": " /"
"text": "ometric"
},
{
"id": 2069,
"logprob": -0.03829956,
"id": 5651,
"logprob": -0.028625488,
"special": false,
"text": " len"
"text": " mean"
},
{
"id": 26,
"logprob": -0.0011987686,
"id": 432,
"logprob": -0.29418945,
"special": false,
"text": "("
"text": " of"
},
{
"id": 62,
"logprob": -0.00050878525,
"id": 312,
"logprob": -0.3161621,
"special": false,
"text": "L"
"text": " a"
}
]
],
"top_tokens": null
},
"generated_text": "\n return sum(L) / len(L"
"generated_text": "\n \"\"\"\n Calculate the geometric mean of a"
},
{
"details": {
@ -145,57 +146,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -7.59375,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25878906,
"logprob": -0.26953125,
"text": "_"
},
{
"id": 6009,
"logprob": -2.2109375,
"logprob": -1.640625,
"text": "mean"
},
{
"id": 26,
"logprob": -0.30371094,
"logprob": -0.22705078,
"text": "("
},
{
"id": 62,
"logprob": -5.6054688,
"logprob": -5.234375,
"text": "L"
},
{
"id": 44,
"logprob": -3.0722656,
"logprob": -3.1132812,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6879883,
"logprob": -1.1123047,
"text": " List"
},
{
"id": 77,
"logprob": -0.38500977,
"logprob": -0.14294434,
"text": "["
},
{
"id": 1808,
"logprob": -0.984375,
"logprob": -0.32299805,
"text": "float"
},
{
"id": 10794,
"logprob": -2.5351562,
"logprob": -2.8164062,
"text": "]):"
}
],
@ -203,67 +204,68 @@
"tokens": [
{
"id": 284,
"logprob": -1.1738281,
"logprob": -0.12854004,
"special": false,
"text": "\n "
},
{
"id": 442,
"logprob": -0.9584961,
"id": 1524,
"logprob": -0.9897461,
"special": false,
"text": " return"
"text": " \"\"\""
},
{
"id": 3632,
"logprob": -1.4169922,
"id": 284,
"logprob": -0.69970703,
"special": false,
"text": " sum"
"text": "\n "
},
{
"id": 26,
"logprob": -0.085876465,
"id": 14883,
"logprob": -2.2050781,
"special": false,
"text": "("
"text": " Calculate"
},
{
"id": 62,
"logprob": -0.0982666,
"id": 322,
"logprob": -0.2668457,
"special": false,
"text": "L"
"text": " the"
},
{
"id": 27,
"logprob": -0.3022461,
"id": 3226,
"logprob": -0.08496094,
"special": false,
"text": ")"
"text": " ge"
},
{
"id": 517,
"logprob": -0.40504883,
"id": 21017,
"logprob": -0.019012451,
"special": false,
"text": " /"
"text": "ometric"
},
{
"id": 2069,
"logprob": -0.041656494,
"id": 5651,
"logprob": -0.029037476,
"special": false,
"text": " len"
"text": " mean"
},
{
"id": 26,
"logprob": -0.0011844635,
"id": 432,
"logprob": -0.2939453,
"special": false,
"text": "("
"text": " of"
},
{
"id": 62,
"logprob": -0.0005264282,
"id": 312,
"logprob": -0.31591797,
"special": false,
"text": "L"
"text": " a"
}
]
],
"top_tokens": null
},
"generated_text": "\n return sum(L) / len(L"
"generated_text": "\n \"\"\"\n Calculate the geometric mean of a"
},
{
"details": {
@ -278,57 +280,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -7.5859375,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25927734,
"logprob": -0.26586914,
"text": "_"
},
{
"id": 6009,
"logprob": -2.25,
"logprob": -1.6347656,
"text": "mean"
},
{
"id": 26,
"logprob": -0.30126953,
"logprob": -0.22766113,
"text": "("
},
{
"id": 62,
"logprob": -5.7539062,
"logprob": -5.2265625,
"text": "L"
},
{
"id": 44,
"logprob": -3.0878906,
"logprob": -3.0976562,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6845703,
"logprob": -1.1025391,
"text": " List"
},
{
"id": 77,
"logprob": -0.3918457,
"logprob": -0.1427002,
"text": "["
},
{
"id": 1808,
"logprob": -0.8798828,
"logprob": -0.32592773,
"text": "float"
},
{
"id": 10794,
"logprob": -2.4980469,
"logprob": -2.8164062,
"text": "]):"
}
],
@ -336,67 +338,68 @@
"tokens": [
{
"id": 284,
"logprob": -1.1533203,
"logprob": -0.13012695,
"special": false,
"text": "\n "
},
{
"id": 442,
"logprob": -0.9165039,
"id": 1524,
"logprob": -0.98046875,
"special": false,
"text": " return"
"text": " \"\"\""
},
{
"id": 3632,
"logprob": -1.328125,
"id": 284,
"logprob": -0.69921875,
"special": false,
"text": " sum"
"text": "\n "
},
{
"id": 26,
"logprob": -0.07946777,
"id": 14883,
"logprob": -2.1992188,
"special": false,
"text": "("
"text": " Calculate"
},
{
"id": 62,
"logprob": -0.09820557,
"id": 322,
"logprob": -0.2668457,
"special": false,
"text": "L"
"text": " the"
},
{
"id": 27,
"logprob": -0.28930664,
"id": 3226,
"logprob": -0.083496094,
"special": false,
"text": ")"
"text": " ge"
},
{
"id": 517,
"logprob": -0.34592773,
"id": 21017,
"logprob": -0.01902771,
"special": false,
"text": " /"
"text": "ometric"
},
{
"id": 2069,
"logprob": -0.038330078,
"id": 5651,
"logprob": -0.029006958,
"special": false,
"text": " len"
"text": " mean"
},
{
"id": 26,
"logprob": -0.0011940002,
"id": 432,
"logprob": -0.29248047,
"special": false,
"text": "("
"text": " of"
},
{
"id": 62,
"logprob": -0.00050878525,
"id": 312,
"logprob": -0.3161621,
"special": false,
"text": "L"
"text": " a"
}
]
],
"top_tokens": null
},
"generated_text": "\n return sum(L) / len(L"
"generated_text": "\n \"\"\"\n Calculate the geometric mean of a"
},
{
"details": {
@ -411,57 +414,57 @@
},
{
"id": 3226,
"logprob": -9.0234375,
"logprob": -8.5859375,
"text": " ge"
},
{
"id": 21017,
"logprob": -9.0859375,
"logprob": -7.5859375,
"text": "ometric"
},
{
"id": 81,
"logprob": -0.25927734,
"logprob": -0.26904297,
"text": "_"
},
{
"id": 6009,
"logprob": -2.25,
"logprob": -1.6386719,
"text": "mean"
},
{
"id": 26,
"logprob": -0.30126953,
"logprob": -0.22705078,
"text": "("
},
{
"id": 62,
"logprob": -5.7539062,
"logprob": -5.234375,
"text": "L"
},
{
"id": 44,
"logprob": -3.0878906,
"logprob": -3.1132812,
"text": ":"
},
{
"id": 1682,
"logprob": -0.6845703,
"logprob": -1.1074219,
"text": " List"
},
{
"id": 77,
"logprob": -0.3918457,
"logprob": -0.14477539,
"text": "["
},
{
"id": 1808,
"logprob": -0.8798828,
"logprob": -0.3256836,
"text": "float"
},
{
"id": 10794,
"logprob": -2.4980469,
"logprob": -2.8027344,
"text": "]):"
}
],
@ -469,66 +472,67 @@
"tokens": [
{
"id": 284,
"logprob": -1.1533203,
"logprob": -0.12915039,
"special": false,
"text": "\n "
},
{
"id": 442,
"logprob": -0.91259766,
"id": 1524,
"logprob": -0.98535156,
"special": false,
"text": " return"
"text": " \"\"\""
},
{
"id": 3632,
"logprob": -1.3251953,
"id": 284,
"logprob": -0.69921875,
"special": false,
"text": " sum"
"text": "\n "
},
{
"id": 26,
"logprob": -0.08062744,
"id": 14883,
"logprob": -2.2011719,
"special": false,
"text": "("
"text": " Calculate"
},
{
"id": 62,
"logprob": -0.09906006,
"id": 322,
"logprob": -0.26708984,
"special": false,
"text": "L"
"text": " the"
},
{
"id": 27,
"logprob": -0.28979492,
"id": 3226,
"logprob": -0.08502197,
"special": false,
"text": ")"
"text": " ge"
},
{
"id": 517,
"logprob": -0.35958984,
"id": 21017,
"logprob": -0.019012451,
"special": false,
"text": " /"
"text": "ometric"
},
{
"id": 2069,
"logprob": -0.038604736,
"id": 5651,
"logprob": -0.028625488,
"special": false,
"text": " len"
"text": " mean"
},
{
"id": 26,
"logprob": -0.0011901855,
"id": 432,
"logprob": -0.29589844,
"special": false,
"text": "("
"text": " of"
},
{
"id": 62,
"logprob": -0.0005078316,
"id": 312,
"logprob": -0.31591797,
"special": false,
"text": "L"
"text": " a"
}
]
],
"top_tokens": null
},
"generated_text": "\n return sum(L) / len(L"
"generated_text": "\n \"\"\"\n Calculate the geometric mean of a"
}
]

View File

@ -0,0 +1,274 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 30,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 5235,
"logprob": -10.0625,
"text": "info"
},
{
"id": 29901,
"logprob": -3.2324219,
"text": ":"
},
{
"id": 13260,
"logprob": -10.625,
"text": "dav"
},
{
"id": 333,
"logprob": -0.08276367,
"text": "id"
},
{
"id": 8753,
"logprob": -7.5273438,
"text": "hol"
},
{
"id": 17559,
"logprob": -3.8476562,
"text": "tz"
},
{
"id": 763,
"logprob": -10.140625,
"text": "like"
},
{
"id": 10697,
"logprob": -10.1953125,
"text": "trees"
},
{
"id": 322,
"logprob": -2.5742188,
"text": "and"
},
{
"id": 756,
"logprob": -7.4882812,
"text": "has"
},
{
"id": 1023,
"logprob": -5.0507812,
"text": "two"
},
{
"id": 274,
"logprob": -5.3164062,
"text": "c"
},
{
"id": 1446,
"logprob": -0.6694336,
"text": "ats"
},
{
"id": 29889,
"logprob": -0.9995117,
"text": "."
},
{
"id": 29871,
"logprob": -4.2421875,
"text": ""
}
],
"seed": null,
"tokens": [
{
"id": 6377,
"logprob": -0.14916992,
"special": false,
"text": "{\""
},
{
"id": 29888,
"logprob": -0.13598633,
"special": false,
"text": "f"
},
{
"id": 12935,
"logprob": -0.017669678,
"special": false,
"text": "irs"
},
{
"id": 29873,
"logprob": -0.00085639954,
"special": false,
"text": "t"
},
{
"id": 1170,
"logprob": -0.0054016113,
"special": false,
"text": "Name"
},
{
"id": 4710,
"logprob": -0.13549805,
"special": false,
"text": "\":\""
},
{
"id": 19504,
"logprob": -0.8852539,
"special": false,
"text": "David"
},
{
"id": 3284,
"logprob": -0.16394043,
"special": false,
"text": "\",\""
},
{
"id": 29882,
"logprob": -0.08862305,
"special": false,
"text": "h"
},
{
"id": 711,
"logprob": -0.66259766,
"special": false,
"text": "ob"
},
{
"id": 1609,
"logprob": -5.51939e-05,
"special": false,
"text": "by"
},
{
"id": 4710,
"logprob": -0.23120117,
"special": false,
"text": "\":\""
},
{
"id": 29911,
"logprob": -2.3730469,
"special": false,
"text": "T"
},
{
"id": 11003,
"logprob": -0.032104492,
"special": false,
"text": "rees"
},
{
"id": 3284,
"logprob": -0.22021484,
"special": false,
"text": "\",\""
},
{
"id": 4230,
"logprob": -0.06726074,
"special": false,
"text": "last"
},
{
"id": 1170,
"logprob": -0.003501892,
"special": false,
"text": "Name"
},
{
"id": 4710,
"logprob": -0.0045661926,
"special": false,
"text": "\":\""
},
{
"id": 29950,
"logprob": -0.12512207,
"special": false,
"text": "H"
},
{
"id": 14339,
"logprob": -0.009552002,
"special": false,
"text": "olt"
},
{
"id": 29920,
"logprob": -0.00042438507,
"special": false,
"text": "z"
},
{
"id": 3284,
"logprob": -0.11651611,
"special": false,
"text": "\",\""
},
{
"id": 29876,
"logprob": -0.29736328,
"special": false,
"text": "n"
},
{
"id": 398,
"logprob": -0.003030777,
"special": false,
"text": "um"
},
{
"id": 29907,
"logprob": -0.3774414,
"special": false,
"text": "C"
},
{
"id": 1446,
"logprob": -0.0003130436,
"special": false,
"text": "ats"
},
{
"id": 1115,
"logprob": -0.0021514893,
"special": false,
"text": "\":"
},
{
"id": 29906,
"logprob": -0.071899414,
"special": false,
"text": "2"
},
{
"id": 29913,
"logprob": -0.018997192,
"special": false,
"text": "}"
},
{
"id": 2,
"logprob": 0.0,
"special": true,
"text": "</s>"
}
],
"top_tokens": null
},
"generated_text": "{\"firstName\":\"David\",\"hobby\":\"Trees\",\"lastName\":\"Holtz\",\"numCats\":2}"
}

View File

@ -11,92 +11,92 @@
},
{
"id": 4911,
"logprob": -5.7851562,
"logprob": -6.9765625,
"text": "User"
},
{
"id": 29901,
"logprob": -0.006996155,
"logprob": -0.0059432983,
"text": ":"
},
{
"id": 32000,
"logprob": -0.81347656,
"logprob": -0.8408203,
"text": "<fake_token_around_image>"
},
{
"id": 32001,
"logprob": -6.687641e-05,
"logprob": -9.906292e-05,
"text": "<image>"
},
{
"id": 32000,
"logprob": -3.5762787e-07,
"logprob": -2.3841858e-07,
"text": "<fake_token_around_image>"
},
{
"id": 1815,
"logprob": -4.2148438,
"logprob": -4.1679688,
"text": "Can"
},
{
"id": 366,
"logprob": -0.014137268,
"logprob": -0.014099121,
"text": "you"
},
{
"id": 2649,
"logprob": -4.4335938,
"logprob": -4.4609375,
"text": "tell"
},
{
"id": 592,
"logprob": -0.2919922,
"logprob": -0.29882812,
"text": "me"
},
{
"id": 263,
"logprob": -4.2070312,
"logprob": -4.1445312,
"text": "a"
},
{
"id": 1407,
"logprob": -9.421875,
"logprob": -9.3828125,
"text": "very"
},
{
"id": 3273,
"logprob": -1.8720703,
"logprob": -1.9736328,
"text": "short"
},
{
"id": 5828,
"logprob": -0.26489258,
"logprob": -0.2800293,
"text": "story"
},
{
"id": 2729,
"logprob": -3.7441406,
"logprob": -3.5625,
"text": "based"
},
{
"id": 373,
"logprob": -0.0005393028,
"logprob": -0.0006427765,
"text": "on"
},
{
"id": 278,
"logprob": -0.140625,
"logprob": -0.13952637,
"text": "the"
},
{
"id": 1967,
"logprob": -0.06756592,
"logprob": -0.068115234,
"text": "image"
},
{
"id": 29973,
"logprob": -0.15454102,
"logprob": -0.16357422,
"text": "?"
}
],
@ -104,25 +104,25 @@
"tokens": [
{
"id": 32002,
"logprob": -0.0019140244,
"logprob": -0.0026474,
"special": true,
"text": "<end_of_utterance>"
},
{
"id": 29871,
"logprob": -8.404255e-05,
"logprob": -8.547306e-05,
"special": false,
"text": " "
},
{
"id": 13,
"logprob": -1.7642975e-05,
"logprob": -1.7881393e-05,
"special": false,
"text": "\n"
},
{
"id": 7900,
"logprob": -2.9802322e-06,
"logprob": -3.0994415e-06,
"special": false,
"text": "Ass"
},
@ -140,30 +140,29 @@
},
{
"id": 319,
"logprob": -0.91064453,
"logprob": -0.92529297,
"special": false,
"text": " A"
},
{
"id": 696,
"logprob": -1.2412109,
"logprob": -1.1269531,
"special": false,
"text": " ro"
},
{
"id": 15664,
"logprob": -0.0002439022,
"logprob": -0.00029492378,
"special": false,
"text": "oster"
},
{
"id": 15028,
"logprob": -1.1630859,
"logprob": -1.1855469,
"special": false,
"text": " stands"
}
],
"top_tokens": null
]
},
"generated_text": " \nAssistant: A rooster stands"
}

View File

@ -12,92 +12,92 @@
},
{
"id": 4911,
"logprob": -5.7851562,
"logprob": -6.9804688,
"text": "User"
},
{
"id": 29901,
"logprob": -0.006996155,
"logprob": -0.006122589,
"text": ":"
},
{
"id": 32000,
"logprob": -0.81347656,
"logprob": -0.8417969,
"text": "<fake_token_around_image>"
},
{
"id": 32001,
"logprob": -6.687641e-05,
"logprob": -9.918213e-05,
"text": "<image>"
},
{
"id": 32000,
"logprob": -3.5762787e-07,
"logprob": -2.3841858e-07,
"text": "<fake_token_around_image>"
},
{
"id": 1815,
"logprob": -4.2148438,
"logprob": -4.1679688,
"text": "Can"
},
{
"id": 366,
"logprob": -0.014137268,
"logprob": -0.014091492,
"text": "you"
},
{
"id": 2649,
"logprob": -4.4335938,
"logprob": -4.4726562,
"text": "tell"
},
{
"id": 592,
"logprob": -0.2919922,
"logprob": -0.2998047,
"text": "me"
},
{
"id": 263,
"logprob": -4.2070312,
"logprob": -4.15625,
"text": "a"
},
{
"id": 1407,
"logprob": -9.421875,
"logprob": -9.3828125,
"text": "very"
},
{
"id": 3273,
"logprob": -1.8720703,
"logprob": -1.9716797,
"text": "short"
},
{
"id": 5828,
"logprob": -0.26489258,
"logprob": -0.27734375,
"text": "story"
},
{
"id": 2729,
"logprob": -3.7441406,
"logprob": -3.5605469,
"text": "based"
},
{
"id": 373,
"logprob": -0.0005393028,
"logprob": -0.00064468384,
"text": "on"
},
{
"id": 278,
"logprob": -0.140625,
"logprob": -0.14160156,
"text": "the"
},
{
"id": 1967,
"logprob": -0.06756592,
"logprob": -0.06915283,
"text": "image"
},
{
"id": 29973,
"logprob": -0.15454102,
"logprob": -0.16381836,
"text": "?"
}
],
@ -105,19 +105,19 @@
"tokens": [
{
"id": 32002,
"logprob": -0.0019140244,
"logprob": -0.0026664734,
"special": true,
"text": "<end_of_utterance>"
},
{
"id": 29871,
"logprob": -8.392334e-05,
"logprob": -8.583069e-05,
"special": false,
"text": " "
},
{
"id": 13,
"logprob": -1.7881393e-05,
"logprob": -1.8119812e-05,
"special": false,
"text": "\n"
},
@ -135,36 +135,35 @@
},
{
"id": 29901,
"logprob": -3.0994415e-06,
"logprob": -3.2186508e-06,
"special": false,
"text": ":"
},
{
"id": 319,
"logprob": -0.9057617,
"logprob": -0.9301758,
"special": false,
"text": " A"
},
{
"id": 696,
"logprob": -1.2294922,
"logprob": -1.1279297,
"special": false,
"text": " ro"
},
{
"id": 15664,
"logprob": -0.00024533272,
"logprob": -0.0002939701,
"special": false,
"text": "oster"
},
{
"id": 15028,
"logprob": -1.1640625,
"logprob": -1.1865234,
"special": false,
"text": " stands"
}
],
"top_tokens": null
]
},
"generated_text": " \nAssistant: A rooster stands"
},
@ -181,92 +180,92 @@
},
{
"id": 4911,
"logprob": -5.7773438,
"logprob": -6.9804688,
"text": "User"
},
{
"id": 29901,
"logprob": -0.0070114136,
"logprob": -0.006122589,
"text": ":"
},
{
"id": 32000,
"logprob": -0.8208008,
"logprob": -0.8417969,
"text": "<fake_token_around_image>"
},
{
"id": 32001,
"logprob": -6.699562e-05,
"logprob": -9.942055e-05,
"text": "<image>"
},
{
"id": 32000,
"logprob": -3.5762787e-07,
"logprob": -2.3841858e-07,
"text": "<fake_token_around_image>"
},
{
"id": 1815,
"logprob": -4.2265625,
"logprob": -4.1679688,
"text": "Can"
},
{
"id": 366,
"logprob": -0.014175415,
"logprob": -0.014091492,
"text": "you"
},
{
"id": 2649,
"logprob": -4.4296875,
"logprob": -4.4726562,
"text": "tell"
},
{
"id": 592,
"logprob": -0.29516602,
"logprob": -0.2998047,
"text": "me"
},
{
"id": 263,
"logprob": -4.2109375,
"logprob": -4.15625,
"text": "a"
},
{
"id": 1407,
"logprob": -9.4296875,
"logprob": -9.3828125,
"text": "very"
},
{
"id": 3273,
"logprob": -1.8720703,
"logprob": -1.9716797,
"text": "short"
},
{
"id": 5828,
"logprob": -0.26879883,
"logprob": -0.27734375,
"text": "story"
},
{
"id": 2729,
"logprob": -3.7675781,
"logprob": -3.5605469,
"text": "based"
},
{
"id": 373,
"logprob": -0.0005354881,
"logprob": -0.0006451607,
"text": "on"
},
{
"id": 278,
"logprob": -0.13671875,
"logprob": -0.14160156,
"text": "the"
},
{
"id": 1967,
"logprob": -0.06719971,
"logprob": -0.06915283,
"text": "image"
},
{
"id": 29973,
"logprob": -0.15551758,
"logprob": -0.16381836,
"text": "?"
}
],
@ -274,19 +273,19 @@
"tokens": [
{
"id": 32002,
"logprob": -0.0019130707,
"logprob": -0.0026664734,
"special": true,
"text": "<end_of_utterance>"
},
{
"id": 29871,
"logprob": -8.392334e-05,
"logprob": -8.571148e-05,
"special": false,
"text": " "
},
{
"id": 13,
"logprob": -1.7881393e-05,
"logprob": -1.8119812e-05,
"special": false,
"text": "\n"
},
@ -310,30 +309,29 @@
},
{
"id": 319,
"logprob": -0.9013672,
"logprob": -0.9301758,
"special": false,
"text": " A"
},
{
"id": 696,
"logprob": -1.2324219,
"logprob": -1.1279297,
"special": false,
"text": " ro"
},
{
"id": 15664,
"logprob": -0.0002477169,
"logprob": -0.0002939701,
"special": false,
"text": "oster"
},
{
"id": 15028,
"logprob": -1.1660156,
"logprob": -1.1865234,
"special": false,
"text": " stands"
}
],
"top_tokens": null
]
},
"generated_text": " \nAssistant: A rooster stands"
},
@ -350,92 +348,92 @@
},
{
"id": 4911,
"logprob": -5.7773438,
"logprob": -6.9804688,
"text": "User"
},
{
"id": 29901,
"logprob": -0.0070114136,
"logprob": -0.006122589,
"text": ":"
},
{
"id": 32000,
"logprob": -0.8208008,
"logprob": -0.8417969,
"text": "<fake_token_around_image>"
},
{
"id": 32001,
"logprob": -6.699562e-05,
"logprob": -9.918213e-05,
"text": "<image>"
},
{
"id": 32000,
"logprob": -3.5762787e-07,
"logprob": -2.3841858e-07,
"text": "<fake_token_around_image>"
},
{
"id": 1815,
"logprob": -4.2265625,
"logprob": -4.1679688,
"text": "Can"
},
{
"id": 366,
"logprob": -0.014175415,
"logprob": -0.014091492,
"text": "you"
},
{
"id": 2649,
"logprob": -4.4296875,
"logprob": -4.4726562,
"text": "tell"
},
{
"id": 592,
"logprob": -0.29516602,
"logprob": -0.2998047,
"text": "me"
},
{
"id": 263,
"logprob": -4.2109375,
"logprob": -4.15625,
"text": "a"
},
{
"id": 1407,
"logprob": -9.4296875,
"logprob": -9.3828125,
"text": "very"
},
{
"id": 3273,
"logprob": -1.8720703,
"logprob": -1.9716797,
"text": "short"
},
{
"id": 5828,
"logprob": -0.26879883,
"logprob": -0.27734375,
"text": "story"
},
{
"id": 2729,
"logprob": -3.7675781,
"logprob": -3.5605469,
"text": "based"
},
{
"id": 373,
"logprob": -0.0005354881,
"logprob": -0.00064468384,
"text": "on"
},
{
"id": 278,
"logprob": -0.13671875,
"logprob": -0.14160156,
"text": "the"
},
{
"id": 1967,
"logprob": -0.06719971,
"logprob": -0.06915283,
"text": "image"
},
{
"id": 29973,
"logprob": -0.15551758,
"logprob": -0.16381836,
"text": "?"
}
],
@ -443,19 +441,19 @@
"tokens": [
{
"id": 32002,
"logprob": -0.001912117,
"logprob": -0.0026664734,
"special": true,
"text": "<end_of_utterance>"
},
{
"id": 29871,
"logprob": -8.392334e-05,
"logprob": -8.59499e-05,
"special": false,
"text": " "
},
{
"id": 13,
"logprob": -1.7762184e-05,
"logprob": -1.8119812e-05,
"special": false,
"text": "\n"
},
@ -479,30 +477,29 @@
},
{
"id": 319,
"logprob": -0.9013672,
"logprob": -0.9301758,
"special": false,
"text": " A"
},
{
"id": 696,
"logprob": -1.2324219,
"logprob": -1.1279297,
"special": false,
"text": " ro"
},
{
"id": 15664,
"logprob": -0.0002477169,
"logprob": -0.0002939701,
"special": false,
"text": "oster"
},
{
"id": 15028,
"logprob": -1.1660156,
"logprob": -1.1865234,
"special": false,
"text": " stands"
}
],
"top_tokens": null
]
},
"generated_text": " \nAssistant: A rooster stands"
},
@ -519,92 +516,92 @@
},
{
"id": 4911,
"logprob": -5.7773438,
"logprob": -6.9804688,
"text": "User"
},
{
"id": 29901,
"logprob": -0.0070114136,
"logprob": -0.006122589,
"text": ":"
},
{
"id": 32000,
"logprob": -0.8208008,
"logprob": -0.8417969,
"text": "<fake_token_around_image>"
},
{
"id": 32001,
"logprob": -6.699562e-05,
"logprob": -9.942055e-05,
"text": "<image>"
},
{
"id": 32000,
"logprob": -3.5762787e-07,
"logprob": -2.3841858e-07,
"text": "<fake_token_around_image>"
},
{
"id": 1815,
"logprob": -4.2265625,
"logprob": -4.1679688,
"text": "Can"
},
{
"id": 366,
"logprob": -0.014175415,
"logprob": -0.014091492,
"text": "you"
},
{
"id": 2649,
"logprob": -4.4296875,
"logprob": -4.4726562,
"text": "tell"
},
{
"id": 592,
"logprob": -0.29516602,
"logprob": -0.2998047,
"text": "me"
},
{
"id": 263,
"logprob": -4.2109375,
"logprob": -4.15625,
"text": "a"
},
{
"id": 1407,
"logprob": -9.4296875,
"logprob": -9.3828125,
"text": "very"
},
{
"id": 3273,
"logprob": -1.8720703,
"logprob": -1.9716797,
"text": "short"
},
{
"id": 5828,
"logprob": -0.26879883,
"logprob": -0.27734375,
"text": "story"
},
{
"id": 2729,
"logprob": -3.7675781,
"logprob": -3.5605469,
"text": "based"
},
{
"id": 373,
"logprob": -0.0005354881,
"logprob": -0.0006451607,
"text": "on"
},
{
"id": 278,
"logprob": -0.13671875,
"logprob": -0.14160156,
"text": "the"
},
{
"id": 1967,
"logprob": -0.06719971,
"logprob": -0.06915283,
"text": "image"
},
{
"id": 29973,
"logprob": -0.15551758,
"logprob": -0.16381836,
"text": "?"
}
],
@ -612,19 +609,19 @@
"tokens": [
{
"id": 32002,
"logprob": -0.001912117,
"logprob": -0.0026664734,
"special": true,
"text": "<end_of_utterance>"
},
{
"id": 29871,
"logprob": -8.392334e-05,
"logprob": -8.571148e-05,
"special": false,
"text": " "
},
{
"id": 13,
"logprob": -1.7762184e-05,
"logprob": -1.8119812e-05,
"special": false,
"text": "\n"
},
@ -648,30 +645,29 @@
},
{
"id": 319,
"logprob": -0.9013672,
"logprob": -0.9301758,
"special": false,
"text": " A"
},
{
"id": 696,
"logprob": -1.2324219,
"logprob": -1.1279297,
"special": false,
"text": " ro"
},
{
"id": 15664,
"logprob": -0.0002477169,
"logprob": -0.0002939701,
"special": false,
"text": "oster"
},
{
"id": 15028,
"logprob": -1.1660156,
"logprob": -1.1865234,
"special": false,
"text": " stands"
}
],
"top_tokens": null
]
},
"generated_text": " \nAssistant: A rooster stands"
}

View File

@ -0,0 +1,65 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "stop_sequence",
"generated_tokens": 6,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 3735,
"logprob": -10.5,
"text": "Test"
},
{
"id": 2159,
"logprob": -12.140625,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": -1.0654297,
"special": false,
"text": "\n"
},
{
"id": 1014,
"logprob": -2.7460938,
"special": false,
"text": "The"
},
{
"id": 6032,
"logprob": -1.359375,
"special": false,
"text": " purpose"
},
{
"id": 302,
"logprob": 0.0,
"special": false,
"text": " of"
},
{
"id": 456,
"logprob": 0.0,
"special": false,
"text": " this"
},
{
"id": 1369,
"logprob": -0.40063477,
"special": false,
"text": " test"
}
],
"top_tokens": null
},
"generated_text": "Test request\nThe purpose of this test"
}

View File

@ -0,0 +1,73 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.00756073,
"special": false,
"text": "\n"
},
{
"id": 13,
"logprob": -0.20117188,
"special": false,
"text": "\n"
},
{
"id": 16114,
"logprob": -1.2597656,
"special": false,
"text": "Once"
},
{
"id": 3714,
"logprob": -0.20825195,
"special": false,
"text": " upon"
},
{
"id": 264,
"logprob": -0.00178051,
"special": false,
"text": " a"
},
{
"id": 727,
"logprob": -0.011955261,
"special": false,
"text": " time"
},
{
"id": 28725,
"logprob": -0.17541504,
"special": false,
"text": ","
},
{
"id": 736,
"logprob": -0.91308594,
"special": false,
"text": " there"
},
{
"id": 403,
"logprob": -0.058410645,
"special": false,
"text": " was"
},
{
"id": 264,
"logprob": -0.009689331,
"special": false,
"text": " a"
}
],
"top_tokens": null
},
"generated_text": "\n\nOnce upon a time, there was a"
}

View File

@ -0,0 +1,73 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [],
"seed": null,
"tokens": [
{
"id": 187,
"logprob": -0.37890625,
"special": false,
"text": "\n"
},
{
"id": 187,
"logprob": -0.26953125,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.1953125,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.53515625,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.625,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.6796875,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.0,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.3125,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0028533936,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.265625,
"special": false,
"text": " machine"
}
],
"top_tokens": null
},
"generated_text": "\n\nDeep learning is a new type of machine"
}

View File

@ -0,0 +1,99 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 2502,
"logprob": null,
"text": " red"
},
{
"id": 13,
"logprob": -2.734375,
"text": ","
},
{
"id": 8862,
"logprob": -3.6875,
"text": " yellow"
},
{
"id": 13,
"logprob": -0.40234375,
"text": ","
},
{
"id": 209,
"logprob": -8.25,
"text": " "
}
],
"seed": 0,
"tokens": [
{
"id": 187,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 395,
"logprob": -0.3125,
"special": false,
"text": "and"
},
{
"id": 4797,
"logprob": 0.0,
"special": false,
"text": " blue"
},
{
"id": 9830,
"logprob": -1.65625,
"special": false,
"text": " colors"
},
{
"id": 15,
"logprob": 0.0,
"special": false,
"text": "."
},
{
"id": 329,
"logprob": -2.4375,
"special": false,
"text": " A"
},
{
"id": 1180,
"logprob": -1.953125,
"special": false,
"text": " number"
},
{
"id": 273,
"logprob": 0.0,
"special": false,
"text": " of"
},
{
"id": 1027,
"logprob": -1.5546875,
"special": false,
"text": " different"
},
{
"id": 3295,
"logprob": -0.97265625,
"special": false,
"text": " color"
}
],
"top_tokens": null
},
"generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
}

View File

@ -0,0 +1,398 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1276,
"logprob": null,
"text": "What"
},
{
"id": 310,
"logprob": -0.83984375,
"text": " is"
},
{
"id": 18147,
"logprob": -12.8125,
"text": " Deep"
},
{
"id": 20727,
"logprob": -2.84375,
"text": " Learning"
},
{
"id": 32,
"logprob": -1.25,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 187,
"logprob": -0.37890625,
"special": false,
"text": "\n"
},
{
"id": 187,
"logprob": -0.4296875,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.078125,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.515625,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.6015625,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.65625,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.109375,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.328125,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0032653809,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.28125,
"special": false,
"text": " machine"
}
],
"top_tokens": null
},
"generated_text": "\n\nDeep learning is a new type of machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1276,
"logprob": null,
"text": "What"
},
{
"id": 310,
"logprob": -0.80078125,
"text": " is"
},
{
"id": 18147,
"logprob": -13.25,
"text": " Deep"
},
{
"id": 20727,
"logprob": -2.828125,
"text": " Learning"
},
{
"id": 32,
"logprob": -1.1953125,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 187,
"logprob": -0.296875,
"special": false,
"text": "\n"
},
{
"id": 187,
"logprob": -0.3359375,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.2578125,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.5546875,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.62890625,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.64453125,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.078125,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.28125,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0030670166,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.3125,
"special": false,
"text": " machine"
}
],
"top_tokens": null
},
"generated_text": "\n\nDeep learning is a new type of machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1276,
"logprob": null,
"text": "What"
},
{
"id": 310,
"logprob": -0.80078125,
"text": " is"
},
{
"id": 18147,
"logprob": -13.25,
"text": " Deep"
},
{
"id": 20727,
"logprob": -2.828125,
"text": " Learning"
},
{
"id": 32,
"logprob": -1.1953125,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 187,
"logprob": -0.296875,
"special": false,
"text": "\n"
},
{
"id": 187,
"logprob": -0.3359375,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.2578125,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.5546875,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.62890625,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.64453125,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.078125,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.28125,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0030670166,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.3125,
"special": false,
"text": " machine"
}
],
"top_tokens": null
},
"generated_text": "\n\nDeep learning is a new type of machine"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1276,
"logprob": null,
"text": "What"
},
{
"id": 310,
"logprob": -0.80078125,
"text": " is"
},
{
"id": 18147,
"logprob": -13.25,
"text": " Deep"
},
{
"id": 20727,
"logprob": -2.828125,
"text": " Learning"
},
{
"id": 32,
"logprob": -1.1953125,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 187,
"logprob": -0.296875,
"special": false,
"text": "\n"
},
{
"id": 187,
"logprob": -0.3359375,
"special": false,
"text": "\n"
},
{
"id": 30763,
"logprob": -1.2578125,
"special": false,
"text": "Deep"
},
{
"id": 4715,
"logprob": -0.5546875,
"special": false,
"text": " learning"
},
{
"id": 310,
"logprob": -0.62890625,
"special": false,
"text": " is"
},
{
"id": 247,
"logprob": -0.64453125,
"special": false,
"text": " a"
},
{
"id": 747,
"logprob": -2.078125,
"special": false,
"text": " new"
},
{
"id": 1511,
"logprob": -2.28125,
"special": false,
"text": " type"
},
{
"id": 273,
"logprob": -0.0030670166,
"special": false,
"text": " of"
},
{
"id": 5145,
"logprob": -1.3125,
"special": false,
"text": " machine"
}
],
"top_tokens": null
},
"generated_text": "\n\nDeep learning is a new type of machine"
}
]

View File

@ -1,8 +1,8 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 9,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 0,
@ -14,7 +14,7 @@
"tokens": [
{
"id": 16017,
"logprob": -0.30908203,
"logprob": 0.0,
"special": false,
"text": " blue"
},
@ -26,39 +26,45 @@
},
{
"id": 259,
"logprob": -0.28271484,
"logprob": -0.4716797,
"special": false,
"text": " "
},
{
"id": 15484,
"logprob": -1.7929688,
"id": 261,
"logprob": -0.044677734,
"special": false,
"text": "appear"
"text": ","
},
{
"id": 345,
"logprob": -0.8935547,
"id": 35622,
"logprob": -0.79589844,
"special": false,
"text": "ed"
"text": " cloud"
},
{
"id": 281,
"id": 263,
"logprob": -1.2958984,
"special": false,
"text": "s"
},
{
"id": 305,
"logprob": 0.0,
"special": false,
"text": " in"
"text": " and"
},
{
"id": 287,
"id": 35622,
"logprob": -1.1630859,
"special": false,
"text": " cloud"
},
{
"id": 263,
"logprob": 0.0,
"special": false,
"text": " the"
},
{
"id": 20495,
"logprob": -0.32299805,
"special": false,
"text": " sky"
"text": "s"
},
{
"id": 1,
@ -66,7 +72,8 @@
"special": true,
"text": "</s>"
}
]
],
"top_tokens": null
},
"generated_text": "Why is the sky blue?blue sky appeared in the sky"
"generated_text": "Why is the sky blue?blue sky, clouds and clouds"
}

View File

@ -0,0 +1,26 @@
{
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"message": {
"content": "As of today, there is a Update available for the Brooklyn, New York, area. According to the latest forecast, it's warm with high temperatures throughout the day. It's forecasted at 75°F for today and 77°F for tomorrow. However, in autumn, the weather typically changes drastically, becoming cooler and wetter. You can find the current weather forecast for the area through your local weather service. Additionally",
"name": null,
"role": "assistant",
"tool_calls": null
},
"usage": null
}
],
"created": 1710795556,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 100,
"prompt_tokens": 60,
"total_tokens": 160
}
}

View File

@ -0,0 +1,40 @@
{
"choices": [
{
"finish_reason": "eos_token",
"index": 0,
"logprobs": null,
"message": {
"content": null,
"name": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"description": null,
"name": "tools",
"parameters": {
"format": "celsius",
"location": "New York, NY",
"num_days": 14
}
},
"id": 0,
"type": "function"
}
]
},
"usage": null
}
],
"created": 1710795556,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 316,
"total_tokens": 345
}
}

View File

@ -0,0 +1,40 @@
{
"choices": [
{
"finish_reason": "eos_token",
"index": 0,
"logprobs": null,
"message": {
"content": null,
"name": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"description": null,
"name": "tools",
"parameters": {
"format": "celsius",
"location": "New York, NY",
"num_days": 14
}
},
"id": 0,
"type": "function"
}
]
},
"usage": null
}
],
"created": 1710795557,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 316,
"total_tokens": 345
}
}

View File

@ -0,0 +1,39 @@
{
"choices": [
{
"finish_reason": "eos_token",
"index": 0,
"logprobs": null,
"message": {
"content": null,
"name": null,
"role": "assistant",
"tool_calls": [
{
"function": {
"description": null,
"name": "tools",
"parameters": {
"format": "celsius",
"location": "New York, NY"
}
},
"id": 0,
"type": "function"
}
]
},
"usage": null
}
],
"created": 1710795557,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 21,
"prompt_tokens": 187,
"total_tokens": 208
}
}

View File

@ -0,0 +1,27 @@
{
"choices": [
{
"delta": {
"content": null,
"role": "assistant",
"tool_calls": {
"function": {
"arguments": "</s>",
"name": null
},
"id": "",
"index": 0,
"type": "function"
}
},
"finish_reason": "eos_token",
"index": 0,
"logprobs": null
}
],
"created": 1710795499,
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "2.0.0-native"
}

View File

@ -18,7 +18,6 @@ async def flash_llama_awq(flash_llama_awq_handle):
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate(
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
@ -33,7 +32,6 @@ async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
response = await flash_llama_awq.generate(
"What is Deep Learning?",
@ -55,7 +53,6 @@ async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_load(flash_llama_awq, generate_load, response_snapshot):
responses = await generate_load(
flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4

View File

@ -18,7 +18,6 @@ async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
response = await flash_llama_awq_sharded.generate(
"What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
@ -33,7 +32,6 @@ async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapsho
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_awq_load_sharded(
flash_llama_awq_sharded, generate_load, response_snapshot
):

View File

@ -0,0 +1,61 @@
import pytest
@pytest.fixture(scope="module")
def flash_gemma_handle(launcher):
with launcher("gg-hf/gemma-2b", num_shard=1) as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_gemma(flash_gemma_handle):
await flash_gemma_handle.health(300)
return flash_gemma_handle.client
@pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_gemma(flash_gemma, response_snapshot):
response = await flash_gemma.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
response = await flash_gemma.generate(
"Test request",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@pytest.mark.skip
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_gemma_load(flash_gemma, generate_load, response_snapshot):
responses = await generate_load(flash_gemma, "Test request", max_new_tokens=10, n=4)
assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses == response_snapshot

Some files were not shown because too many files have changed in this diff Show More