diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 12c93b9e..24ac3cbe 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -5,14 +5,14 @@ body: id: system-info attributes: label: System Info - description: | + description: | Please share your system info with us (`text-generation-launcher --env` if installed locally). - The full command line used that causes issues: + The full command line used that causes issues: OS version: Rust version (if self-compiling, `cargo version`): Model being used (`curl 127.0.0.1:8080/info | jq`): If local model please explicit the kind of model and/or equivalents. - Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`): + Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`): Deployment specificities (Kubernetes, EKS, AKS, any particular deployments): The current version being used: @@ -52,11 +52,11 @@ body: placeholder: | Steps to reproduce the behavior: - + 1. 2. 3. - + - type: textarea id: expected-behavior diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml index 5abc1565..f1a9135c 100644 --- a/.github/ISSUE_TEMPLATE/feature-request.yml +++ b/.github/ISSUE_TEMPLATE/feature-request.yml @@ -19,7 +19,7 @@ body: label: Motivation description: | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. - + - type: textarea id: contribution diff --git a/.github/workflows/autodocs.yml b/.github/workflows/autodocs.yml index a981c09c..7c5c6eca 100644 --- a/.github/workflows/autodocs.yml +++ b/.github/workflows/autodocs.yml @@ -6,15 +6,15 @@ on: jobs: update_docs: runs-on: ubuntu-latest - + steps: - name: Checkout code uses: actions/checkout@v2 - + - name: Install Launcher id: install-launcher run: cargo install --git https://github.com/${{ github.repository }} --branch ${{ github.head_ref }} text-generation-launcher - + - name: Check launcher Docs are up-to-date run: | echo text-generation-launcher --help diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 395a0b6a..066ea889 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -146,11 +146,50 @@ jobs: cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min + integration-tests: + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + needs: + - start-runner + - build-and-push-image # Wait for the docker image to be built + runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + env: + DOCKER_VOLUME: /cache + steps: + - uses: actions/checkout@v2 + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4.4.1 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: 3.9 + - name: Tailscale + uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + - name: Prepare disks + run: | + sudo mkfs -t ext4 /dev/nvme1n1 + sudo mkdir ${{ env.DOCKER_VOLUME }} + sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }} + - name: Install + run: | + make install-integration-tests + - name: Run tests + run: | + export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} + export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} + pytest -s -vv integration-tests + build-and-push-image-rocm: concurrency: group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }} cancel-in-progress: true - needs: start-runner # required to start the main job when the runner is ready + needs: + - start-runner + - build-and-push-image # Wait for the main docker image to be built + - integration-tests # Wait for the main integration-tests runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner permissions: contents: write @@ -235,43 +274,6 @@ jobs: cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min - integration-tests: - concurrency: - group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - needs: - - start-runner - - build-and-push-image # Wait for the docker image to be built - - build-and-push-image-rocm - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner - env: - DOCKER_VOLUME: /cache - steps: - - uses: actions/checkout@v2 - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: 3.9 - - name: Tailscale - uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 - with: - authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - - name: Prepare disks - run: | - sudo mkfs -t ext4 /dev/nvme1n1 - sudo mkdir ${{ env.DOCKER_VOLUME }} - sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }} - - name: Install - run: | - make install-integration-tests - - name: Run tests - run: | - export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} - export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} - pytest -s -vv integration-tests - stop-runner: name: Stop self-hosted EC2 runner needs: diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index b46216ec..a5ce39a5 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -16,4 +16,4 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: text-generation-inference - additional_args: --not_python_module + additional_args: --not_python_module diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml deleted file mode 100644 index 1cad807b..00000000 --- a/.github/workflows/delete_doc_comment.yml +++ /dev/null @@ -1,12 +0,0 @@ -name: Delete doc comment - -on: - pull_request: - types: [ closed ] - - -jobs: - delete: - uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main - with: - pr_number: ${{ github.event.number }} \ No newline at end of file diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml new file mode 100644 index 00000000..a5e50a79 --- /dev/null +++ b/.github/workflows/stale.yml @@ -0,0 +1,14 @@ +name: 'Close stale issues and PRs' +on: + schedule: + - cron: '30 1 * * *' + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v8 + with: + stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' + days-before-stale: 30 + days-before-close: 5 diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 311ee6b9..29ff6d45 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -33,11 +33,18 @@ jobs: - name: Install Rust uses: actions-rs/toolchain@v1 with: - toolchain: 1.71.0 + # Released on: 28 December, 2023 + # Branched from master on: 10 November, 2023 + # https://releases.rs/docs/1.75.0/ + toolchain: 1.75.0 override: true components: rustfmt, clippy - name: Install Protoc uses: arduino/setup-protoc@v1 + - name: Clean unused files + run: | + sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android + sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET - name: Install sccache run: | curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache @@ -68,12 +75,11 @@ jobs: pip install pytest export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} pytest -s -vv server/tests - - name: Run Rust fmt + - name: Pre-commit checks run: | - cargo fmt --check - - name: Run Rust clippy - run: | - cargo clippy + pip install pre-commit + pre-commit install + pre-commit run --all-files - name: Run Rust tests run: | cargo test diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml index b984ead2..ae00bb51 100644 --- a/.github/workflows/upload_pr_documentation.yml +++ b/.github/workflows/upload_pr_documentation.yml @@ -13,4 +13,4 @@ jobs: package_name: text-generation-inference secrets: hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} - comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} \ No newline at end of file + comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} diff --git a/.gitignore b/.gitignore index 20c9baee..b3ca772b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,12 @@ target router/tokenizer.json *__pycache__* + +# ROCm auto-generated files +*.hip +server/exllamav2_kernels/exllamav2_kernels/hip/ +server/exllama_kernels/exllama_kernels/hip/ +server/exllama_kernels/exllama_kernels/hip_func/ +*_hip.cuh +server/exllama_kernels/exllama_kernels/hip_buffers.cuh +server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..45bc07a5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: docs/source/basic_tutorials/launcher.md +- repo: https://github.com/psf/black + rev: 24.2.0 + hooks: + - id: black +- repo: https://github.com/doublify/pre-commit-rust + rev: v1.0 + hooks: + - id: fmt + - id: cargo-check + - id: clippy diff --git a/Cargo.lock b/Cargo.lock index 04d42397..e5f47197 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,7 +24,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", + "getrandom", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -38,6 +40,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aligned-vec" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" + [[package]] name = "anstream" version = "0.6.13" @@ -88,9 +96,15 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" + +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "arc-swap" @@ -98,6 +112,23 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" +[[package]] +name = "arg_enum_proc_macro" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + [[package]] name = "async-rustls" version = "0.3.0" @@ -128,18 +159,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -148,6 +179,20 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" +[[package]] +name = "av1-grain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf" +dependencies = [ + "anyhow", + "arrayvec", + "log", + "nom", + "num-rational", + "v_frame", +] + [[package]] name = "average" version = "0.14.2" @@ -159,6 +204,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "avif-serialize" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2" +dependencies = [ + "arrayvec", +] + [[package]] name = "awaitdrop" version = "0.1.2" @@ -265,6 +319,33 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bit_field" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61" + [[package]] name = "bitflags" version = "1.3.2" @@ -277,6 +358,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "bitstream-io" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06c9989a51171e2e81038ab168b6ae22886fe9ded214430dbb4f41c28cf176da" + [[package]] name = "block-buffer" version = "0.10.4" @@ -287,16 +374,28 @@ dependencies = [ ] [[package]] -name = "bumpalo" -version = "3.15.4" +name = "built" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" +checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16" + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytecount" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" + +[[package]] +name = "bytemuck" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15" [[package]] name = "byteorder" @@ -304,6 +403,12 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" +[[package]] +name = "byteorder-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" + [[package]] name = "bytes" version = "1.6.0" @@ -350,9 +455,24 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" [[package]] name = "cc" -version = "1.0.90" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" +checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b" +dependencies = [ + "jobserver", + "libc", + "once_cell", +] + +[[package]] +name = "cfg-expr" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02" +dependencies = [ + "smallvec", + "target-lexicon", +] [[package]] name = "cfg-if" @@ -397,7 +517,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -406,6 +526,12 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +[[package]] +name = "color_quant" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b" + [[package]] name = "colorchoice" version = "1.0.0" @@ -518,6 +644,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + [[package]] name = "crypto-common" version = "0.1.6" @@ -675,9 +807,9 @@ dependencies = [ [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "encode_unicode" @@ -687,9 +819,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" [[package]] name = "encoding_rs" -version = "0.8.33" +version = "0.8.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" +checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59" dependencies = [ "cfg-if", ] @@ -719,12 +851,47 @@ dependencies = [ "cc", ] +[[package]] +name = "exr" +version = "1.72.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4" +dependencies = [ + "bit_field", + "flume", + "half", + "lebe", + "miniz_oxide", + "rayon-core", + "smallvec", + "zune-inflate", +] + +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastrand" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" +[[package]] +name = "fdeflate" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645" +dependencies = [ + "simd-adler32", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -753,6 +920,15 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28a80e3145d8ad11ba0995949bbcf48b9df2be62772b3d351ef017dff6ecb853" +[[package]] +name = "flume" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +dependencies = [ + "spin 0.9.8", +] + [[package]] name = "fnv" version = "1.0.7" @@ -783,6 +959,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "futures" version = "0.3.30" @@ -839,7 +1025,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -893,13 +1079,25 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" +checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", +] + +[[package]] +name = "gif" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2" +dependencies = [ + "color_quant", + "weezl", ] [[package]] @@ -920,9 +1118,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -937,6 +1135,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -983,25 +1191,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732" dependencies = [ "dirs 5.0.1", + "futures", "indicatif", "log", "native-tls", + "num_cpus", "rand", + "reqwest", "serde", "serde_json", "thiserror", + "tokio", "ureq", ] -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "hostname" version = "0.3.1" @@ -1118,6 +1321,45 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "image" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11" +dependencies = [ + "bytemuck", + "byteorder", + "color_quant", + "exr", + "gif", + "image-webp", + "num-traits", + "png", + "qoi", + "ravif", + "rayon", + "rgb", + "tiff", + "zune-core", + "zune-jpeg", +] + +[[package]] +name = "image-webp" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d" +dependencies = [ + "byteorder-lite", + "thiserror", +] + +[[package]] +name = "imgref" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126" + [[package]] name = "indexmap" version = "1.9.3" @@ -1180,12 +1422,32 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "interpolate_name" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + [[package]] name = "ipnet" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "iso8601" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "924e5d73ea28f59011fec52a0d12185d496a9b075d360657aed2a5707f701153" +dependencies = [ + "nom", +] + [[package]] name = "itertools" version = "0.10.5" @@ -1204,12 +1466,36 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +[[package]] +name = "jobserver" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +dependencies = [ + "libc", +] + +[[package]] +name = "jpeg-decoder" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" + [[package]] name = "js-sys" version = "0.3.69" @@ -1219,18 +1505,65 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" +dependencies = [ + "ahash", + "anyhow", + "base64 0.21.7", + "bytecount", + "clap", + "fancy-regex", + "fraction", + "getrandom", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot", + "percent-encoding", + "regex", + "reqwest", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lebe" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8" + [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "libfuzzer-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + [[package]] name = "libm" version = "0.2.8" @@ -1269,6 +1602,15 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "loop9" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062" +dependencies = [ + "imgref", +] + [[package]] name = "mach2" version = "0.4.2" @@ -1315,6 +1657,16 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "maybe-rayon" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519" +dependencies = [ + "cfg-if", + "rayon", +] + [[package]] name = "memchr" version = "2.7.2" @@ -1358,7 +1710,7 @@ checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1392,6 +1744,14 @@ dependencies = [ "unicase", ] +[[package]] +name = "minijinja" +version = "1.0.12" +source = "git+https://github.com/mitsuhiko/minijinja.git?rev=5cd4efb#5cd4efb9e2639247df275fe6e22a5dbe0ce71b28" +dependencies = [ + "serde", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1405,6 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", + "simd-adler32", ] [[package]] @@ -1421,9 +1782,9 @@ dependencies = [ [[package]] name = "monostate" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "878c2a1f1c70e5724fa28f101ca787b6a7e8ad5c5e4ae4ca3b0fa4a419fa9075" +checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf" dependencies = [ "monostate-impl", "serde", @@ -1431,20 +1792,20 @@ dependencies = [ [[package]] name = "monostate-impl" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce" +checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "multimap" -version = "0.8.3" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" [[package]] name = "muxado" @@ -1483,6 +1844,12 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + [[package]] name = "ngrok" version = "0.13.1" @@ -1542,6 +1909,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "noop_proc_macro" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8" + [[package]] name = "ntapi" version = "0.4.1" @@ -1561,12 +1934,95 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.18" @@ -1662,7 +2118,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1878,7 +2334,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1899,6 +2355,19 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "png" +version = "0.17.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1" +dependencies = [ + "bitflags 1.3.2", + "crc32fast", + "fdeflate", + "flate2", + "miniz_oxide", +] + [[package]] name = "portable-atomic" version = "1.6.0" @@ -1919,12 +2388,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "prettyplease" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" +checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" dependencies = [ "proc-macro2", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -1953,13 +2422,32 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58" +dependencies = [ + "profiling-procmacros", +] + +[[package]] +name = "profiling-procmacros" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" +dependencies = [ + "quote", + "syn 2.0.60", +] + [[package]] name = "prost" version = "0.11.9" @@ -1972,34 +2460,33 @@ dependencies = [ [[package]] name = "prost" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922" dependencies = [ "bytes", - "prost-derive 0.12.3", + "prost-derive 0.12.4", ] [[package]] name = "prost-build" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2" +checksum = "80b776a1b2dc779f5ee0641f8ade0125bc1298dd41a9a0c16d8bd57b42d222b1" dependencies = [ "bytes", - "heck 0.4.1", - "itertools 0.11.0", + "heck 0.5.0", + "itertools 0.12.1", "log", "multimap", "once_cell", "petgraph", "prettyplease", - "prost 0.12.3", + "prost 0.12.4", "prost-types", "regex", - "syn 2.0.58", + "syn 2.0.60", "tempfile", - "which", ] [[package]] @@ -2017,24 +2504,33 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48" dependencies = [ "anyhow", - "itertools 0.11.0", + "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "prost-types" -version = "0.12.3" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e" +checksum = "3235c33eb02c1f1e212abdbe34c78b264b038fb58ca612664343271e36e55ffe" dependencies = [ - "prost 0.12.3", + "prost 0.12.4", +] + +[[package]] +name = "qoi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001" +dependencies = [ + "bytemuck", ] [[package]] @@ -2054,10 +2550,16 @@ dependencies = [ ] [[package]] -name = "quote" -version = "1.0.35" +name = "quick-error" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -2109,6 +2611,56 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "rav1e" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9" +dependencies = [ + "arbitrary", + "arg_enum_proc_macro", + "arrayvec", + "av1-grain", + "bitstream-io", + "built", + "cfg-if", + "interpolate_name", + "itertools 0.12.1", + "libc", + "libfuzzer-sys", + "log", + "maybe-rayon", + "new_debug_unreachable", + "noop_proc_macro", + "num-derive", + "num-traits", + "once_cell", + "paste", + "profiling", + "rand", + "rand_chacha", + "simd_helpers", + "system-deps", + "thiserror", + "v_frame", + "wasm-bindgen", +] + +[[package]] +name = "ravif" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234" +dependencies = [ + "avif-serialize", + "imgref", + "loop9", + "quick-error", + "rav1e", + "rayon", + "rgb", +] + [[package]] name = "raw-cpuid" version = "10.7.0" @@ -2259,6 +2811,15 @@ dependencies = [ "winreg", ] +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -2310,7 +2871,7 @@ dependencies = [ "quote", "rust-embed-utils", "shellexpand", - "syn 2.0.58", + "syn 2.0.60", "walkdir", ] @@ -2341,9 +2902,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.32" +version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ "bitflags 2.5.0", "errno", @@ -2366,9 +2927,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.3" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", "ring 0.17.8", @@ -2389,15 +2950,15 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247" +checksum = "beb461507cee2c2ff151784c52762cf4d9ff6a61f3e80968600ed24fa837fa54" [[package]] name = "rustls-webpki" -version = "0.102.2" +version = "0.102.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" +checksum = "f3bce581c0dd41bce533ce695a1437fa16a7ab5ac3ccfa99fe1a620a7885eabf" dependencies = [ "ring 0.17.8", "rustls-pki-types", @@ -2406,9 +2967,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47" [[package]] name = "ryu" @@ -2484,29 +3045,29 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "itoa", "ryu", @@ -2523,6 +3084,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -2587,13 +3157,28 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.1" +version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "simd_helpers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6" +dependencies = [ + "quote", +] + [[package]] name = "sketches-ddsketch" version = "0.2.2" @@ -2645,6 +3230,9 @@ name = "spin" version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +dependencies = [ + "lock_api", +] [[package]] name = "spm_precompiled" @@ -2689,7 +3277,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2711,9 +3299,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.58" +version = "2.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" dependencies = [ "proc-macro2", "quote", @@ -2728,9 +3316,9 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" [[package]] name = "sysinfo" -version = "0.30.8" +version = "0.30.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b1a378e48fb3ce3a5cf04359c456c9c98ff689bcf1c1bc6e6a31f247686f275" +checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83" dependencies = [ "cfg-if", "core-foundation-sys", @@ -2761,6 +3349,19 @@ dependencies = [ "libc", ] +[[package]] +name = "system-deps" +version = "6.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349" +dependencies = [ + "cfg-expr", + "heck 0.5.0", + "pkg-config", + "toml", + "version-compare", +] + [[package]] name = "tabled" version = "0.14.0" @@ -2785,6 +3386,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + [[package]] name = "tempfile" version = "3.10.1" @@ -2799,7 +3406,7 @@ dependencies = [ [[package]] name = "text-generation-benchmark" -version = "1.2.0" +version = "2.0.0" dependencies = [ "average", "clap", @@ -2812,7 +3419,7 @@ dependencies = [ "tabled", "text-generation-client", "thiserror", - "tokenizers", + "tokenizers 0.14.1", "tokio", "tracing", "tracing-subscriber", @@ -2820,11 +3427,11 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "1.2.0" +version = "2.0.0" dependencies = [ "futures", "grpc-metadata", - "prost 0.12.3", + "prost 0.12.4", "prost-build", "rand", "thiserror", @@ -2837,12 +3444,14 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "1.2.0" +version = "2.0.0" dependencies = [ "clap", "ctrlc", "float_eq", + "hf-hub", "nix", + "once_cell", "reqwest", "serde", "serde_json", @@ -2853,28 +3462,35 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "1.2.0" +version = "2.0.0" dependencies = [ "async-stream", "axum", "axum-tracing-opentelemetry", + "base64 0.22.0", "clap", "futures", + "futures-util", "hf-hub", + "image", "init-tracing-opentelemetry", + "jsonschema", "metrics", "metrics-exporter-prometheus", + "minijinja", "ngrok", "nohash-hasher", + "once_cell", "opentelemetry", "opentelemetry-otlp", "rand", + "regex", "reqwest", "serde", "serde_json", "text-generation-client", "thiserror", - "tokenizers", + "tokenizers 0.15.2", "tokio", "tokio-stream", "tower-http", @@ -2888,22 +3504,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.58" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -2917,10 +3533,21 @@ dependencies = [ ] [[package]] -name = "time" -version = "0.3.34" +name = "tiff" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" +checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e" +dependencies = [ + "flate2", + "jpeg-decoder", + "weezl", +] + +[[package]] +name = "time" +version = "0.3.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" dependencies = [ "deranged", "itoa", @@ -2941,9 +3568,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" +checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" dependencies = [ "num-conv", "time-core", @@ -2998,6 +3625,40 @@ dependencies = [ "unicode_categories", ] +[[package]] +name = "tokenizers" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d" +dependencies = [ + "aho-corasick", + "clap", + "derive_builder", + "esaxx-rs", + "getrandom", + "hf-hub", + "indicatif", + "itertools 0.12.1", + "lazy_static", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand", + "rayon", + "rayon-cond", + "regex", + "regex-syntax 0.8.3", + "serde", + "serde_json", + "spm_precompiled", + "thiserror", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.37.0" @@ -3035,7 +3696,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3085,6 +3746,40 @@ dependencies = [ "tracing", ] +[[package]] +name = "toml" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef" +dependencies = [ + "indexmap 2.2.6", + "serde", + "serde_spanned", + "toml_datetime", + "winnow", +] + [[package]] name = "tonic" version = "0.9.2" @@ -3131,7 +3826,7 @@ dependencies = [ "hyper-timeout", "percent-encoding", "pin-project", - "prost 0.12.3", + "prost 0.12.4", "tokio", "tokio-stream", "tower", @@ -3150,7 +3845,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3223,7 +3918,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3401,16 +4096,16 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.9.6" +version = "2.9.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35" +checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd" dependencies = [ - "base64 0.21.7", + "base64 0.22.0", "flate2", "log", "native-tls", "once_cell", - "rustls 0.22.3", + "rustls 0.22.4", "rustls-pki-types", "rustls-webpki", "serde", @@ -3464,7 +4159,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3483,6 +4178,23 @@ dependencies = [ "zip", ] +[[package]] +name = "uuid" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" + +[[package]] +name = "v_frame" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b" +dependencies = [ + "aligned-vec", + "num-traits", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.0" @@ -3511,6 +4223,12 @@ dependencies = [ "time", ] +[[package]] +name = "version-compare" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" + [[package]] name = "version_check" version = "0.9.4" @@ -3563,7 +4281,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-shared", ] @@ -3597,7 +4315,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3638,16 +4356,10 @@ dependencies = [ ] [[package]] -name = "which" -version = "4.4.2" +name = "weezl" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] +checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" [[package]] name = "winapi" @@ -3667,11 +4379,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" dependencies = [ - "winapi", + "windows-sys 0.52.0", ] [[package]] @@ -3687,7 +4399,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ "windows-core", - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3696,7 +4408,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3723,7 +4435,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.4", + "windows-targets 0.52.5", ] [[package]] @@ -3758,17 +4470,18 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" dependencies = [ - "windows_aarch64_gnullvm 0.52.4", - "windows_aarch64_msvc 0.52.4", - "windows_i686_gnu 0.52.4", - "windows_i686_msvc 0.52.4", - "windows_x86_64_gnu 0.52.4", - "windows_x86_64_gnullvm 0.52.4", - "windows_x86_64_msvc 0.52.4", + "windows_aarch64_gnullvm 0.52.5", + "windows_aarch64_msvc 0.52.5", + "windows_i686_gnu 0.52.5", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.5", + "windows_x86_64_gnu 0.52.5", + "windows_x86_64_gnullvm 0.52.5", + "windows_x86_64_msvc 0.52.5", ] [[package]] @@ -3785,9 +4498,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" [[package]] name = "windows_aarch64_msvc" @@ -3803,9 +4516,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" [[package]] name = "windows_i686_gnu" @@ -3821,9 +4534,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" [[package]] name = "windows_i686_msvc" @@ -3839,9 +4558,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" [[package]] name = "windows_x86_64_gnu" @@ -3857,9 +4576,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" [[package]] name = "windows_x86_64_gnullvm" @@ -3875,9 +4594,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" [[package]] name = "windows_x86_64_msvc" @@ -3893,9 +4612,18 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.4" +version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" + +[[package]] +name = "winnow" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352" +dependencies = [ + "memchr", +] [[package]] name = "winreg" @@ -3924,7 +4652,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.60", ] [[package]] @@ -3944,3 +4672,27 @@ dependencies = [ "crossbeam-utils", "flate2", ] + +[[package]] +name = "zune-core" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" + +[[package]] +name = "zune-inflate" +version = "0.2.54" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02" +dependencies = [ + "simd-adler32", +] + +[[package]] +name = "zune-jpeg" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448" +dependencies = [ + "zune-core", +] diff --git a/Cargo.toml b/Cargo.toml index 3677d2b6..ecb4878f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ resolver = "2" [workspace.package] -version = "1.2.0" +version = "2.0.0" edition = "2021" authors = ["Olivier Dehaene"] homepage = "https://github.com/huggingface/text-generation-inference" @@ -17,5 +17,7 @@ homepage = "https://github.com/huggingface/text-generation-inference" [profile.release] debug = 1 incremental = true -lto = "off" +lto = "fat" +opt-level = 3 +codegen-units = 1 panic = "abort" diff --git a/Dockerfile b/Dockerfile index 481bfb2a..c7c65769 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,7 +31,7 @@ COPY launcher launcher RUN cargo build --release # Text Generation Inference base image -FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest as base +FROM vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest as base # Text Generation Inference base env ENV HUGGINGFACE_HUB_CACHE=/data \ @@ -58,8 +58,8 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements.txt && \ - bash ./dill-0.3.7-patch.sh && \ - pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 && \ + bash ./dill-0.3.8-patch.sh && \ + pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.0 && \ pip install . --no-cache-dir # Install benchmarker @@ -72,5 +72,7 @@ COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/ # Final image FROM base -ENTRYPOINT ["text-generation-launcher"] +COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh + +ENTRYPOINT ["/tgi-entrypoint.sh"] CMD ["--json-output"] diff --git a/Dockerfile_amd b/Dockerfile_amd index dd331a5d..fb820116 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse @@ -36,7 +36,7 @@ COPY launcher launcher RUN cargo build --release # Text Generation Inference base image for RoCm -FROM rocm/dev-ubuntu-20.04:5.7 as base +FROM rocm/dev-ubuntu-22.04:5.7 as base RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ build-essential \ @@ -75,8 +75,8 @@ RUN chmod +x ~/mambaforge.sh && \ mamba init && \ rm ~/mambaforge.sh -# Install PyTorch nightly (2.2.0.dev2023) compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6. -RUN pip install --pre torch==2.2.0.dev20231106 --index-url https://download.pytorch.org/whl/nightly/rocm5.7 +# Install PyTorch 2.2 RC compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6. +RUN pip install torch --index-url https://download.pytorch.org/whl/test/rocm5.7/ FROM base AS kernel-builder @@ -104,6 +104,20 @@ WORKDIR /usr/src COPY server/custom_kernels/ . RUN PYTORCH_ROCM_ARCH=gfx90a python setup.py build +# Build exllama kernels +FROM kernel-builder as exllama-kernels-builder +WORKDIR /usr/src +COPY server/exllama_kernels/ . + +RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build + +# Build exllama v2 kernels +FROM kernel-builder as exllamav2-kernels-builder +WORKDIR /usr/src +COPY server/exllamav2_kernels/ . + +RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build + FROM base as base-copy # Text Generation Inference base env @@ -120,6 +134,12 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86 # Copy build artifacts from custom kernels builder COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages +# Copy build artifacts from exllama kernels builder +COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + +# Copy build artifacts from exllamav2 kernels builder +COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages + # Install flash-attention dependencies RUN pip install einops --no-cache-dir @@ -130,7 +150,7 @@ COPY server/Makefile server/Makefile RUN cd server && \ make gen-server && \ pip install -r requirements_rocm.txt && \ - pip install ".[accelerate, peft]" --no-cache-dir + pip install ".[accelerate, peft, outlines]" --no-cache-dir # Install benchmarker COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark diff --git a/LICENSE b/LICENSE index 19a34fcf..7d0e8034 100644 --- a/LICENSE +++ b/LICENSE @@ -1,181 +1,201 @@ -Hugging Face Optimized Inference License 1.0 (HFOILv1.0) + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -This License Agreement governs the use of the Software and its Modifications. It is a -binding agreement between the Licensor and You. + 1. Definitions. -This License Agreement shall be referred to as Hugging Face Optimized Inference License -1.0 or HFOILv1.0. We may publish revised versions of this License Agreement from time to -time. Each version will be given a distinguished number. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -By downloading, accessing, modifying, distributing or otherwise using the Software, You -consent to all of the terms and conditions below. So, if You do not agree with those, -please do not download, access, modify, distribute, or use the Software. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. -1. PERMISSIONS + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. -You may use, modify and distribute the Software pursuant to the following terms and -conditions: + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. -Copyright License. Subject to the terms and conditions of this License Agreement and where -and as applicable, each Contributor hereby grants You a perpetual, worldwide, -non-exclusive, royalty-free, copyright license to reproduce, prepare, publicly display, -publicly perform, sublicense under the terms herein, and distribute the Software and -Modifications of the Software. + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. -Patent License. Subject to the terms and conditions of this License Agreement and where -and as applicable, each Contributor hereby grants You a perpetual, worldwide, -non-exclusive, royalty-free patent license to make, have made, Use, offer to sell, sell, -import, and otherwise transfer the Software, where such license applies only to those -patent claims licensable by such Contributor that are necessarily infringed by their -Contribution(s) alone or by combination of their Contribution(s) with the Software to -which such Contribution(s) was submitted. If You institute patent litigation against any -entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Software -or a Contribution incorporated within the Software constitutes direct or contributory -patent infringement, then any rights granted to You under this License Agreement for the -Software shall terminate as of the date such litigation is filed. + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). -No other rights. All rights not expressly granted herein are retained. + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." -2. RESTRICTIONS + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. -You may not distribute the Software as a hosted or managed, and paid service, where the -service grants users access to any substantial set of the features or functionality of the -Software. If you wish to do so, You will need to be granted additional rights from the -Licensor which will be subject to a separate mutually agreed agreement. + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. -You may not sublicense the Software under any other terms than those listed in this -License. + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: -3. OBLIGATIONS + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and -When You modify the Software, You agree to: - attach a notice stating the Modifications of -the Software You made; and - attach a notice stating that the Modifications of the -Software are released under this License Agreement. + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and -When You distribute the Software or Modifications of the Software, You agree to: - give -any recipients of the Software a copy of this License Agreement; - retain all Explanatory -Documentation; and if sharing the Modifications of the Software, add Explanatory -Documentation documenting the changes made to create the Modifications of the Software; - -retain all copyright, patent, trademark and attribution notices. + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. -4. MISCELLANEOUS + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. -Termination. Licensor reserves the right to restrict Use of the Software in violation of -this License Agreement, upon which Your licenses will automatically terminate. + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. -Contributions. Unless You explicitly state otherwise, any Contribution intentionally -submitted for inclusion in the Software by You to the Licensor shall be under the terms -and conditions of this License, without any additional terms or conditions. -Notwithstanding the above, nothing herein shall supersede or modify the terms of any -separate license agreement you may have executed with Licensor regarding such -Contributions. + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. -Trademarks and related. Nothing in this License Agreement permits You (i) to make Use of -Licensors’ trademarks, trade names, or logos, (ii) otherwise suggest endorsement by -Licensor, or (iii) misrepresent the relationship between the parties; and any rights not -expressly granted herein are reserved by the Licensors. + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. -Output You generate. Licensor claims no rights in the Output. You agree not to contravene -any provision as stated in the License Agreement with your Use of the Output. + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. -Disclaimer of Warranty. Except as expressly provided otherwise herein, and to the fullest -extent permitted by law, Licensor provides the Software (and each Contributor provides its -Contributions) AS IS, and Licensor disclaims all warranties or guarantees of any kind, -express or implied, whether arising under any law or from any usage in trade, or otherwise -including but not limited to the implied warranties of merchantability, non-infringement, -quiet enjoyment, fitness for a particular purpose, or otherwise. You are solely -responsible for determining the appropriateness of the Software and Modifications of the -Software for your purposes (including your use or distribution of the Software and -Modifications of the Software), and assume any risks associated with Your exercise of -permissions under this License Agreement. + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. -Limitation of Liability. In no event and under no legal theory, whether in tort (including -negligence), contract, or otherwise, unless required by applicable law (such as deliberate -and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to -You for damages, including any direct, indirect, special, incidental, or consequential -damages of any character arising as a result of this License Agreement or out of the Use -or inability to Use the Software (including but not limited to damages for loss of -goodwill, work stoppage, computer failure or malfunction, model failure or malfunction, or -any and all other commercial damages or losses), even if such Contributor has been advised -of the possibility of such damages. + END OF TERMS AND CONDITIONS -Accepting Warranty or Additional Liability. While sharing the Software or Modifications of -the Software thereof, You may choose to offer and charge a fee for, acceptance of support, -warranty, indemnity, or other liability obligations and/or rights consistent with this -License Agreement. However, in accepting such obligations, You may act only on Your own -behalf and on Your sole responsibility, not on behalf of Licensor or any other -Contributor, and you hereby agree to indemnify, defend, and hold Licensor and each other -Contributor (and their successors or assigns) harmless for any liability incurred by, or -claims asserted against, such Licensor or Contributor (and their successors or assigns) by -reason of your accepting any such warranty or additional liability. + APPENDIX: How to apply the Apache License to your work. -Severability. This License Agreement is a license of copyright and patent rights and an -agreement in contract between You and the Licensor. If any provision of this License -Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall -be unaffected thereby and remain valid as if such provision had not been set forth herein. + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + Copyright 2022 Hugging Face -5. DEFINITIONS + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at -“Contribution” refers to any work of authorship, including the original version of the -Software and any Modifications of the Software that is intentionally submitted to Licensor -for inclusion in the Software by the copyright owner or by an individual or entity -authorized to submit on behalf of the copyright owner. For the purposes of this -definition, “submitted” means any form of electronic, verbal, or written communication -sent to the Licensor or its representatives, including but not limited to communication on -electronic mailing lists, source code control systems, and issue tracking systems that are -managed by, or on behalf of, the Licensor for the purpose of discussing and improving the -Software, but excluding communication that is conspicuously marked or otherwise designated -in writing by the copyright owner as “Not a Contribution.” + http://www.apache.org/licenses/LICENSE-2.0 -“Contributor” refers to Licensor and any individual or entity on behalf of whom a -Contribution has been received by Licensor and subsequently incorporated within the -Software. - -“Data” refers to a collection of information extracted from the dataset used with the -Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not -licensed under this License Agreement. - -“Explanatory Documentation” refers to any documentation or related information including -but not limited to model cards or data cards dedicated to inform the public about the -characteristics of the Software. Explanatory documentation is not licensed under this -License. - -"License Agreement" refers to these terms and conditions. - -“Licensor” refers to the rights owners or entity authorized by the rights owners that are -granting the terms and conditions of this License Agreement. - -“Model” refers to machine-learning based assemblies (including checkpoints), consisting of -learnt weights and parameters (including optimizer states), corresponding to a model -architecture as embodied in Software source code. Source code is not licensed under this -License Agreement. - -“Modifications of the Software” refers to all changes to the Software, including without -limitation derivative works of the Software. - -“Output” refers to the results of operating the Software. - -“Share” refers to any transmission, reproduction, publication or other sharing of the -Software or Modifications of the Software to a third party, including providing the -Softwaire as a hosted service made available by electronic or other remote means, -including - but not limited to - API-based or web access. - -“Software” refers to the software and Model (or parts of either) that Licensor makes -available under this License Agreement. - -“Third Parties” refers to individuals or legal entities that are not under common control -with Licensor or You. - -“Use” refers to anything You or your representatives do with the Software, including but -not limited to generating any Output, fine tuning, updating, running, training, evaluating -and/or reparametrizing the Model. - -"You" (or "Your") refers to an individual or Legal Entity exercising permissions granted -by this License Agreement and/or making Use of the Software for whichever purpose and in -any field of Use. + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 843117d8..56f370a7 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene model=meta-llama/Llama-2-7b-hf volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run - docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model + docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --max-input-length 1024 --max-total-tokens 2048 ``` > For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to pass `-e HUGGING_FACE_HUB_TOKEN=` to the `docker run` command above with a valid Hugging Face Hub read token. @@ -53,7 +53,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene model=meta-llama/Llama-2-70b-hf volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run - docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8 + docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8 --max-input-length 1024 --max-total-tokens 2048 ``` 3. You can then send a simple request: ```bash diff --git a/assets/architecture.jpg b/assets/architecture.jpg deleted file mode 100644 index c4a511c9..00000000 Binary files a/assets/architecture.jpg and /dev/null differ diff --git a/assets/architecture.png b/assets/architecture.png new file mode 100644 index 00000000..1bcd1283 Binary files /dev/null and b/assets/architecture.png differ diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml index 2dd2e64d..40738c4d 100644 --- a/benchmark/Cargo.toml +++ b/benchmark/Cargo.toml @@ -29,4 +29,3 @@ tui = {package = "ratatui", version = "0.23", default-features = false, features tracing = "0.1.37" tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } hf-hub = "0.3.1" - diff --git a/benchmark/README.md b/benchmark/README.md index 7f51a731..17a02a30 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -6,12 +6,12 @@ -A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha) +A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha) and powered by [tui](https://github.com/tui-rs-revival/ratatui). -## Install +## Install -```shell +```shell make install-benchmark ``` @@ -27,4 +27,4 @@ Then run the benchmarking tool: ```shell text-generation-benchmark --tokenizer-name bigscience/bloom-560m -``` \ No newline at end of file +``` diff --git a/benchmark/src/app.rs b/benchmark/src/app.rs index 49654c1b..48ac976a 100644 --- a/benchmark/src/app.rs +++ b/benchmark/src/app.rs @@ -444,7 +444,7 @@ fn progress_gauge(title: &str, label: String, progress: f64, color: Color) -> Ga } /// Throughput paragraph -fn throughput_paragraph<'a>(throughput: &Vec, name: &'static str) -> Paragraph<'a> { +fn throughput_paragraph<'a>(throughput: &[f64], name: &'static str) -> Paragraph<'a> { // Throughput average/high/low texts let throughput_texts = statis_spans(throughput, "tokens/secs"); @@ -457,7 +457,7 @@ fn throughput_paragraph<'a>(throughput: &Vec, name: &'static str) -> Paragr } /// Latency paragraph -fn latency_paragraph<'a>(latency: &mut Vec, name: &'static str) -> Paragraph<'a> { +fn latency_paragraph<'a>(latency: &mut [f64], name: &'static str) -> Paragraph<'a> { // Latency average/high/low texts let mut latency_texts = statis_spans(latency, "ms"); @@ -466,7 +466,7 @@ fn latency_paragraph<'a>(latency: &mut Vec, name: &'static str) -> Paragrap let latency_percentiles = crate::utils::percentiles(latency, &[50, 90, 99]); // Latency p50/p90/p99 texts - let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed]; + let colors = [Color::LightGreen, Color::LightYellow, Color::LightRed]; for (i, (name, value)) in latency_percentiles.iter().enumerate() { let span = Line::from(vec![Span::styled( format!("{name}: {value:.2} ms"), @@ -483,7 +483,7 @@ fn latency_paragraph<'a>(latency: &mut Vec, name: &'static str) -> Paragrap } /// Average/High/Low spans -fn statis_spans<'a>(data: &Vec, unit: &'static str) -> Vec> { +fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec> { vec![ Line::from(vec![Span::styled( format!( @@ -543,7 +543,7 @@ fn latency_histogram<'a>( /// Latency/Throughput chart fn latency_throughput_chart<'a>( - latency_throughput: &'a Vec<(f64, f64)>, + latency_throughput: &'a [(f64, f64)], batch_sizes: &'a [u32], zoom: bool, name: &'static str, diff --git a/benchmark/src/generation.rs b/benchmark/src/generation.rs index 67afa04e..ea7c9778 100644 --- a/benchmark/src/generation.rs +++ b/benchmark/src/generation.rs @@ -163,7 +163,7 @@ async fn prefill( // Run prefill let start_time = Instant::now(); - let (_, decode_batch) = client.prefill(batch.clone()).await?; + let (_, decode_batch, _) = client.prefill(batch.clone()).await?; // Get latency let latency = start_time.elapsed(); diff --git a/benchmark/src/lib.rs b/benchmark/src/lib.rs index 1875652c..638c6514 100644 --- a/benchmark/src/lib.rs +++ b/benchmark/src/lib.rs @@ -8,7 +8,7 @@ use crate::app::App; use crate::event::Event; use crossterm::ExecutableCommand; use std::io; -use text_generation_client::{NextTokenChooserParameters, ShardedClient}; +use text_generation_client::{GrammarType, NextTokenChooserParameters, ShardedClient}; use tokenizers::Tokenizer; use tokio::sync::{broadcast, mpsc}; use tui::backend::CrosstermBackend; @@ -30,6 +30,7 @@ pub async fn run( top_p: Option, typical_p: Option, repetition_penalty: Option, + frequency_penalty: Option, watermark: bool, do_sample: bool, client: ShardedClient, @@ -42,7 +43,10 @@ pub async fn run( do_sample, seed: 0, repetition_penalty: repetition_penalty.unwrap_or(1.0), + frequency_penalty: frequency_penalty.unwrap_or(0.0), watermark, + grammar: String::new(), + grammar_type: GrammarType::None as i32, }; // Initialize terminal properties @@ -140,6 +144,7 @@ pub async fn run( top_p, typical_p, repetition_penalty, + frequency_penalty, watermark, do_sample, ); diff --git a/benchmark/src/main.rs b/benchmark/src/main.rs index 97c8af1c..2d89e045 100644 --- a/benchmark/src/main.rs +++ b/benchmark/src/main.rs @@ -84,6 +84,11 @@ struct Args { #[clap(long, env)] repetition_penalty: Option, + /// Generation parameter in case you want to specifically test/debug particular + /// decoding strategies, for full doc refer to the `text-generation-server` + #[clap(long, env)] + frequency_penalty: Option, + /// Generation parameter in case you want to specifically test/debug particular /// decoding strategies, for full doc refer to the `text-generation-server` #[clap(long, env)] @@ -119,6 +124,7 @@ fn main() -> Result<(), Box> { top_p, typical_p, repetition_penalty, + frequency_penalty, watermark, do_sample, master_shard_uds_path, @@ -187,6 +193,7 @@ fn main() -> Result<(), Box> { top_p, typical_p, repetition_penalty, + frequency_penalty, watermark, do_sample, sharded_client, diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs index 9e36717b..e18d7310 100644 --- a/benchmark/src/table.rs +++ b/benchmark/src/table.rs @@ -15,6 +15,7 @@ pub(crate) fn parameters_table( top_p: Option, typical_p: Option, repetition_penalty: Option, + frequency_penalty: Option, watermark: bool, do_sample: bool, ) -> Table { @@ -33,6 +34,7 @@ pub(crate) fn parameters_table( builder.push_record(["Top P", &format!("{top_p:?}")]); builder.push_record(["Typical P", &format!("{typical_p:?}")]); builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]); + builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]); builder.push_record(["Watermark", &watermark.to_string()]); builder.push_record(["Do Sample", &do_sample.to_string()]); @@ -149,7 +151,7 @@ fn add_throuhgputs( } } -fn avg_min_max(data: &Vec) -> (f64, f64, f64) { +fn avg_min_max(data: &[f64]) -> (f64, f64, f64) { let average = data.iter().sum::() / data.len() as f64; let min = data .iter() @@ -162,7 +164,7 @@ fn avg_min_max(data: &Vec) -> (f64, f64, f64) { (average, *min, *max) } -fn px(data: &Vec, p: u32) -> f64 { +fn px(data: &[f64], p: u32) -> f64 { let i = (f64::from(p) / 100.0 * data.len() as f64) as usize; *data.get(i).unwrap_or(&std::f64::NAN) } diff --git a/clients/python/.gitignore b/clients/python/.gitignore index 5758ba92..5a8ecaa7 100644 --- a/clients/python/.gitignore +++ b/clients/python/.gitignore @@ -155,4 +155,4 @@ dmypy.json cython_debug/ transformers -safetensors \ No newline at end of file +safetensors diff --git a/clients/python/Makefile b/clients/python/Makefile index 8b4334bd..42720875 100644 --- a/clients/python/Makefile +++ b/clients/python/Makefile @@ -3,4 +3,4 @@ unit-tests: install: pip install pip --upgrade - pip install -e . \ No newline at end of file + pip install -e . diff --git a/clients/python/README.md b/clients/python/README.md index 82f3ee0c..bf37508e 100644 --- a/clients/python/README.md +++ b/clients/python/README.md @@ -107,7 +107,19 @@ print(text) ### Types ```python -# Request Parameters +# enum for grammar type +class GrammarType(Enum): + Json = "json" + Regex = "regex" + + +# Grammar type and value +class Grammar: + # Grammar type + type: GrammarType + # Grammar value + value: Union[str, dict] + class Parameters: # Activate logits sampling do_sample: bool @@ -116,6 +128,10 @@ class Parameters: # The parameter for repetition penalty. 1.0 means no penalty. # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. repetition_penalty: Optional[float] + # The parameter for frequency penalty. 1.0 means no penalty + # Penalize new tokens based on their existing frequency in the text so far, + # decreasing the model's likelihood to repeat the same line verbatim. + frequency_penalty: Optional[float] # Whether to prepend the prompt to the generated text return_full_text: bool # Stop generating tokens if a member of `stop_sequences` is generated @@ -138,10 +154,22 @@ class Parameters: best_of: Optional[int] # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) watermark: bool + # Get generation details + details: bool # Get decoder input token logprobs and ids decoder_input_details: bool # Return the N most likely tokens at each step - top_n_tokens: Optional[int] + top_n_tokens: Optional[int] + # grammar to use for generation + grammar: Optional[Grammar] + +class Request: + # Prompt + inputs: str + # Generation parameters + parameters: Optional[Parameters] + # Whether to stream output tokens + stream: bool # Decoder input tokens class InputToken: @@ -161,7 +189,7 @@ class Token: # Token text text: str # Logprob - logprob: float + logprob: Optional[float] # Is the token a special token # Can be used to ignore tokens when concatenating special: bool @@ -192,7 +220,7 @@ class BestOfSequence: # Generated tokens tokens: List[Token] # Most likely tokens - top_tokens: Optional[List[List[Token]]] + top_tokens: Optional[List[List[Token]]] # `generate` details @@ -236,7 +264,7 @@ class StreamResponse: # Generated token token: Token # Most likely tokens - top_tokens: Optional[List[Token]] + top_tokens: Optional[List[Token]] # Complete generated text # Only available when the generation is finished generated_text: Optional[str] @@ -248,4 +276,4 @@ class StreamResponse: class DeployedModel: model_id: str sha: str -``` \ No newline at end of file +``` diff --git a/clients/python/poetry.lock b/clients/python/poetry.lock index 2d4e45d2..148d9906 100644 --- a/clients/python/poetry.lock +++ b/clients/python/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -707,18 +707,19 @@ files = [ [[package]] name = "pydantic" -version = "2.4.2" +version = "2.5.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic-2.4.2-py3-none-any.whl", hash = "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1"}, - {file = "pydantic-2.4.2.tar.gz", hash = "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7"}, + {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"}, + {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"}, ] [package.dependencies] annotated-types = ">=0.4.0" -pydantic-core = "2.10.1" +importlib-metadata = {version = "*", markers = "python_version == \"3.7\""} +pydantic-core = "2.14.6" typing-extensions = ">=4.6.1" [package.extras] @@ -726,117 +727,116 @@ email = ["email-validator (>=2.0.0)"] [[package]] name = "pydantic-core" -version = "2.10.1" +version = "2.14.6" description = "" optional = false python-versions = ">=3.7" files = [ - {file = "pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63"}, - {file = "pydantic_core-2.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e"}, - {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e"}, - {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e"}, - {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6"}, - {file = "pydantic_core-2.10.1-cp310-none-win32.whl", hash = "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b"}, - {file = "pydantic_core-2.10.1-cp310-none-win_amd64.whl", hash = "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0"}, - {file = "pydantic_core-2.10.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea"}, - {file = "pydantic_core-2.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8"}, - {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4"}, - {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607"}, - {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f"}, - {file = "pydantic_core-2.10.1-cp311-none-win32.whl", hash = "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6"}, - {file = "pydantic_core-2.10.1-cp311-none-win_amd64.whl", hash = "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27"}, - {file = "pydantic_core-2.10.1-cp311-none-win_arm64.whl", hash = "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325"}, - {file = "pydantic_core-2.10.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921"}, - {file = "pydantic_core-2.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901"}, - {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d"}, - {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f"}, - {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c"}, - {file = "pydantic_core-2.10.1-cp312-none-win32.whl", hash = "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f"}, - {file = "pydantic_core-2.10.1-cp312-none-win_amd64.whl", hash = "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430"}, - {file = "pydantic_core-2.10.1-cp312-none-win_arm64.whl", hash = "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15"}, - {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f"}, - {file = "pydantic_core-2.10.1-cp37-none-win32.whl", hash = "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c"}, - {file = "pydantic_core-2.10.1-cp37-none-win_amd64.whl", hash = "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e"}, - {file = "pydantic_core-2.10.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc"}, - {file = "pydantic_core-2.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302"}, - {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e"}, - {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561"}, - {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de"}, - {file = "pydantic_core-2.10.1-cp38-none-win32.whl", hash = "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee"}, - {file = "pydantic_core-2.10.1-cp38-none-win_amd64.whl", hash = "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e"}, - {file = "pydantic_core-2.10.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970"}, - {file = "pydantic_core-2.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a"}, - {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429"}, - {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7"}, - {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595"}, - {file = "pydantic_core-2.10.1-cp39-none-win32.whl", hash = "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a"}, - {file = "pydantic_core-2.10.1-cp39-none-win_amd64.whl", hash = "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357"}, - {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2"}, - {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132"}, - {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7"}, - {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776"}, - {file = "pydantic_core-2.10.1.tar.gz", hash = "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"}, + {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"}, + {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"}, + {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"}, + {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"}, + {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"}, + {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"}, + {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"}, + {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"}, + {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"}, + {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"}, + {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"}, + {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"}, + {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"}, + {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"}, + {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"}, + {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"}, + {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"}, + {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"}, + {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"}, + {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"}, + {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"}, + {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"}, + {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"}, + {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"}, + {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"}, + {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"}, + {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"}, + {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"}, + {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"}, + {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"}, + {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"}, + {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"}, + {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"}, + {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"}, ] [package.dependencies] @@ -928,6 +928,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 4fe6e8b0..2925085b 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "text-generation" -version = "0.6.1" +version = "0.7.0" description = "Hugging Face Text Generation Python Client" license = "Apache-2.0" authors = ["Olivier Dehaene "] @@ -12,7 +12,7 @@ repository = "https://github.com/huggingface/text-generation-inference" [tool.poetry.dependencies] python = "^3.7" -pydantic = "> 1.10, < 3" +pydantic = "> 2, < 3" aiohttp = "^3.8" huggingface-hub = ">= 0.12, < 1.0" diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py index 0bf80f8c..95d23901 100644 --- a/clients/python/text_generation/client.py +++ b/clients/python/text_generation/client.py @@ -3,13 +3,19 @@ import requests from aiohttp import ClientSession, ClientTimeout from pydantic import ValidationError -from typing import Dict, Optional, List, AsyncIterator, Iterator +from typing import Dict, Optional, List, AsyncIterator, Iterator, Union from text_generation.types import ( StreamResponse, Response, Request, Parameters, + Grammar, + ChatRequest, + ChatCompletionChunk, + ChatComplete, + Message, + Tool, ) from text_generation.errors import parse_error @@ -58,6 +64,120 @@ class Client: self.cookies = cookies self.timeout = timeout + def chat( + self, + messages: List[Message], + repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + stream: bool = False, + seed: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + tools: Optional[List[Tool]] = None, + tool_choice: Optional[str] = None, + ): + """ + Given a list of messages, generate a response asynchronously + + Args: + messages (`List[Message]`): + List of messages + repetition_penalty (`float`): + The parameter for repetition penalty. 0.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 0.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. + logit_bias (`List[float]`): + Adjust the likelihood of specified tokens + logprobs (`bool`): + Include log probabilities in the response + top_logprobs (`int`): + Include the `n` most likely tokens at each step + max_tokens (`int`): + Maximum number of generated tokens + n (`int`): + Generate `n` completions + presence_penalty (`float`): + The parameter for presence penalty. 0.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + stream (`bool`): + Stream the response + seed (`int`): + Random sampling seed + temperature (`float`): + The value used to module the logits distribution. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation + tools (`List[Tool]`): + List of tools to use + tool_choice (`str`): + The tool to use + + """ + request = ChatRequest( + model="tgi", + messages=messages, + repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stream=stream, + seed=seed, + temperature=temperature, + top_p=top_p, + tools=tools, + tool_choice=tool_choice, + ) + if not stream: + resp = requests.post( + f"{self.base_url}/v1/chat/completions", + json=request.dict(), + headers=self.headers, + cookies=self.cookies, + timeout=self.timeout, + ) + payload = resp.json() + if resp.status_code != 200: + raise parse_error(resp.status_code, payload) + return ChatComplete(**payload) + else: + return self._chat_stream_response(request) + + def _chat_stream_response(self, request): + resp = requests.post( + f"{self.base_url}/v1/chat/completions", + json=request.dict(), + headers=self.headers, + cookies=self.cookies, + timeout=self.timeout, + stream=True, + ) + # iterate and print stream + for byte_payload in resp.iter_lines(): + if byte_payload == b"\n": + continue + payload = byte_payload.decode("utf-8") + if payload.startswith("data:"): + json_payload = json.loads(payload.lstrip("data:").rstrip("\n")) + try: + response = ChatCompletionChunk(**json_payload) + yield response + except ValidationError: + raise parse_error(resp.status, json_payload) + def generate( self, prompt: str, @@ -65,6 +185,7 @@ class Client: max_new_tokens: int = 20, best_of: Optional[int] = None, repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, return_full_text: bool = False, seed: Optional[int] = None, stop_sequences: Optional[List[str]] = None, @@ -76,6 +197,7 @@ class Client: watermark: bool = False, decoder_input_details: bool = False, top_n_tokens: Optional[int] = None, + grammar: Optional[Grammar] = None, ) -> Response: """ Given a prompt, generate the following text @@ -92,6 +214,10 @@ class Client: repetition_penalty (`float`): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 1.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. return_full_text (`bool`): Whether to prepend the prompt to the generated text seed (`int`): @@ -116,6 +242,9 @@ class Client: Return the decoder input token logprobs and ids top_n_tokens (`int`): Return the `n` most likely tokens at each step + grammar (`Grammar`): + Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation + of the text to match a regular expression or JSON schema. Returns: Response: generated response @@ -127,6 +256,7 @@ class Client: do_sample=do_sample, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, return_full_text=return_full_text, seed=seed, stop=stop_sequences if stop_sequences is not None else [], @@ -138,6 +268,7 @@ class Client: watermark=watermark, decoder_input_details=decoder_input_details, top_n_tokens=top_n_tokens, + grammar=grammar, ) request = Request(inputs=prompt, stream=False, parameters=parameters) @@ -159,6 +290,7 @@ class Client: do_sample: bool = False, max_new_tokens: int = 20, repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, return_full_text: bool = False, seed: Optional[int] = None, stop_sequences: Optional[List[str]] = None, @@ -169,6 +301,7 @@ class Client: typical_p: Optional[float] = None, watermark: bool = False, top_n_tokens: Optional[int] = None, + grammar: Optional[Grammar] = None, ) -> Iterator[StreamResponse]: """ Given a prompt, generate the following stream of tokens @@ -183,6 +316,10 @@ class Client: repetition_penalty (`float`): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 1.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. return_full_text (`bool`): Whether to prepend the prompt to the generated text seed (`int`): @@ -205,6 +342,9 @@ class Client: Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) top_n_tokens (`int`): Return the `n` most likely tokens at each step + grammar (`Grammar`): + Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation + of the text to match a regular expression or JSON schema. Returns: Iterator[StreamResponse]: stream of generated tokens @@ -217,6 +357,7 @@ class Client: do_sample=do_sample, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, return_full_text=return_full_text, seed=seed, stop=stop_sequences if stop_sequences is not None else [], @@ -227,6 +368,7 @@ class Client: typical_p=typical_p, watermark=watermark, top_n_tokens=top_n_tokens, + grammar=grammar, ) request = Request(inputs=prompt, stream=True, parameters=parameters) @@ -306,7 +448,120 @@ class AsyncClient: self.base_url = base_url self.headers = headers self.cookies = cookies - self.timeout = ClientTimeout(timeout * 60) + self.timeout = ClientTimeout(timeout) + + async def chat( + self, + messages: List[Message], + repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[List[float]] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + max_tokens: Optional[int] = None, + n: Optional[int] = None, + presence_penalty: Optional[float] = None, + stream: bool = False, + seed: Optional[int] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + tools: Optional[List[Tool]] = None, + tool_choice: Optional[str] = None, + ) -> Union[ChatComplete, AsyncIterator[ChatCompletionChunk]]: + """ + Given a list of messages, generate a response asynchronously + + Args: + messages (`List[Message]`): + List of messages + repetition_penalty (`float`): + The parameter for frequency penalty. 0.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 0.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. + logit_bias (`List[float]`): + Adjust the likelihood of specified tokens + logprobs (`bool`): + Include log probabilities in the response + top_logprobs (`int`): + Include the `n` most likely tokens at each step + max_tokens (`int`): + Maximum number of generated tokens + n (`int`): + Generate `n` completions + presence_penalty (`float`): + The parameter for presence penalty. 0.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + stream (`bool`): + Stream the response + seed (`int`): + Random sampling seed + temperature (`float`): + The value used to module the logits distribution. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation + tools (`List[Tool]`): + List of tools to use + tool_choice (`str`): + The tool to use + + """ + request = ChatRequest( + model="tgi", + messages=messages, + repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, + logprobs=logprobs, + top_logprobs=top_logprobs, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stream=stream, + seed=seed, + temperature=temperature, + top_p=top_p, + tools=tools, + tool_choice=tool_choice, + ) + if not stream: + return await self._chat_single_response(request) + else: + return self._chat_stream_response(request) + + async def _chat_single_response(self, request): + async with ClientSession( + headers=self.headers, cookies=self.cookies, timeout=self.timeout + ) as session: + async with session.post( + f"{self.base_url}/v1/chat/completions", json=request.dict() + ) as resp: + payload = await resp.json() + if resp.status != 200: + raise parse_error(resp.status, payload) + return ChatComplete(**payload) + + async def _chat_stream_response(self, request): + async with ClientSession( + headers=self.headers, cookies=self.cookies, timeout=self.timeout + ) as session: + async with session.post( + f"{self.base_url}/v1/chat/completions", json=request.dict() + ) as resp: + async for byte_payload in resp.content: + if byte_payload == b"\n": + continue + payload = byte_payload.decode("utf-8") + if payload.startswith("data:"): + json_payload = json.loads(payload.lstrip("data:").rstrip("\n")) + try: + response = ChatCompletionChunk(**json_payload) + yield response + except ValidationError: + raise parse_error(resp.status, json_payload) async def generate( self, @@ -315,6 +570,7 @@ class AsyncClient: max_new_tokens: int = 20, best_of: Optional[int] = None, repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, return_full_text: bool = False, seed: Optional[int] = None, stop_sequences: Optional[List[str]] = None, @@ -326,6 +582,7 @@ class AsyncClient: watermark: bool = False, decoder_input_details: bool = False, top_n_tokens: Optional[int] = None, + grammar: Optional[Grammar] = None, ) -> Response: """ Given a prompt, generate the following text asynchronously @@ -342,6 +599,10 @@ class AsyncClient: repetition_penalty (`float`): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 1.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. return_full_text (`bool`): Whether to prepend the prompt to the generated text seed (`int`): @@ -366,10 +627,14 @@ class AsyncClient: Return the decoder input token logprobs and ids top_n_tokens (`int`): Return the `n` most likely tokens at each step + grammar (`Grammar`): + Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation + of the text to match a regular expression or JSON schema. Returns: Response: generated response """ + # Validate parameters parameters = Parameters( best_of=best_of, @@ -378,6 +643,7 @@ class AsyncClient: do_sample=do_sample, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, return_full_text=return_full_text, seed=seed, stop=stop_sequences if stop_sequences is not None else [], @@ -388,6 +654,7 @@ class AsyncClient: typical_p=typical_p, watermark=watermark, top_n_tokens=top_n_tokens, + grammar=grammar, ) request = Request(inputs=prompt, stream=False, parameters=parameters) @@ -407,6 +674,7 @@ class AsyncClient: do_sample: bool = False, max_new_tokens: int = 20, repetition_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, return_full_text: bool = False, seed: Optional[int] = None, stop_sequences: Optional[List[str]] = None, @@ -417,6 +685,7 @@ class AsyncClient: typical_p: Optional[float] = None, watermark: bool = False, top_n_tokens: Optional[int] = None, + grammar: Optional[Grammar] = None, ) -> AsyncIterator[StreamResponse]: """ Given a prompt, generate the following stream of tokens asynchronously @@ -431,6 +700,10 @@ class AsyncClient: repetition_penalty (`float`): The parameter for repetition penalty. 1.0 means no penalty. See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + frequency_penalty (`float`): + The parameter for frequency penalty. 1.0 means no penalty + Penalize new tokens based on their existing frequency in the text so far, + decreasing the model's likelihood to repeat the same line verbatim. return_full_text (`bool`): Whether to prepend the prompt to the generated text seed (`int`): @@ -453,6 +726,9 @@ class AsyncClient: Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) top_n_tokens (`int`): Return the `n` most likely tokens at each step + grammar (`Grammar`): + Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation + of the text to match a regular expression or JSON schema. Returns: AsyncIterator[StreamResponse]: stream of generated tokens @@ -465,6 +741,7 @@ class AsyncClient: do_sample=do_sample, max_new_tokens=max_new_tokens, repetition_penalty=repetition_penalty, + frequency_penalty=frequency_penalty, return_full_text=return_full_text, seed=seed, stop=stop_sequences if stop_sequences is not None else [], @@ -475,6 +752,7 @@ class AsyncClient: typical_p=typical_p, watermark=watermark, top_n_tokens=top_n_tokens, + grammar=grammar, ) request = Request(inputs=prompt, stream=True, parameters=parameters) diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py index aa02d8d8..deb987c5 100644 --- a/clients/python/text_generation/types.py +++ b/clients/python/text_generation/types.py @@ -1,10 +1,147 @@ from enum import Enum -from pydantic import BaseModel, validator -from typing import Optional, List +from pydantic import BaseModel, field_validator +from typing import Optional, List, Union, Any from text_generation.errors import ValidationError +# enum for grammar type +class GrammarType(str, Enum): + Json = "json" + Regex = "regex" + + +# Grammar type and value +class Grammar(BaseModel): + # Grammar type + type: GrammarType + # Grammar value + value: Union[str, dict] + + +class ToolCall(BaseModel): + # Id of the tool call + id: int + # Type of the tool call + type: str + # Function details of the tool call + function: dict + + +class Message(BaseModel): + # Role of the message sender + role: str + # Content of the message + content: Optional[str] = None + # Optional name of the message sender + name: Optional[str] = None + # Tool calls associated with the chat completion + tool_calls: Optional[Any] = None + + +class Tool(BaseModel): + # Type of the tool + type: str + # Function details of the tool + function: dict + + +class ChatCompletionComplete(BaseModel): + # Index of the chat completion + index: int + # Message associated with the chat completion + message: Message + # Log probabilities for the chat completion + logprobs: Optional[Any] + # Reason for completion + finish_reason: str + # Usage details of the chat completion + usage: Optional[Any] = None + + +class Function(BaseModel): + name: Optional[str] + arguments: str + + +class ChoiceDeltaToolCall(BaseModel): + index: int + id: str + type: str + function: Function + + +class ChoiceDelta(BaseModel): + role: str + content: Optional[str] = None + tool_calls: Optional[ChoiceDeltaToolCall] + + +class Choice(BaseModel): + index: int + delta: ChoiceDelta + logprobs: Optional[dict] = None + finish_reason: Optional[str] = None + + +class ChatCompletionChunk(BaseModel): + id: str + object: str + created: int + model: str + system_fingerprint: str + choices: List[Choice] + + +class ChatComplete(BaseModel): + # Chat completion details + id: str + object: str + created: int + model: str + system_fingerprint: str + choices: List[ChatCompletionComplete] + usage: Any + + +class ChatRequest(BaseModel): + # Model identifier + model: str + # List of messages in the conversation + messages: List[Message] + # The parameter for repetition penalty. 1.0 means no penalty. + # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + repetition_penalty: Optional[float] = None + # The parameter for frequency penalty. 1.0 means no penalty + # Penalize new tokens based on their existing frequency in the text so far, + # decreasing the model's likelihood to repeat the same line verbatim. + frequency_penalty: Optional[float] = None + # Bias values for token selection + logit_bias: Optional[List[float]] = None + # Whether to return log probabilities + logprobs: Optional[bool] = None + # Number of most likely tokens to return at each position + top_logprobs: Optional[int] = None + # Maximum number of tokens to generate + max_tokens: Optional[int] = None + # Number of chat completion choices to generate + n: Optional[int] = None + # Penalty for presence of new tokens + presence_penalty: Optional[float] = None + # Flag to indicate streaming response + stream: bool = False + # Random sampling seed + seed: Optional[int] = None + # Sampling temperature + temperature: Optional[float] = None + # Top-p value for nucleus sampling + top_p: Optional[float] = None + # List of tools to be used + tools: Optional[List[Tool]] = None + # Choice of tool to be used + tool_choice: Optional[str] = None + + class Parameters(BaseModel): # Activate logits sampling do_sample: bool = False @@ -13,6 +150,10 @@ class Parameters(BaseModel): # The parameter for repetition penalty. 1.0 means no penalty. # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. repetition_penalty: Optional[float] = None + # The parameter for frequency penalty. 1.0 means no penalty + # Penalize new tokens based on their existing frequency in the text so far, + # decreasing the model's likelihood to repeat the same line verbatim. + frequency_penalty: Optional[float] = None # Whether to prepend the prompt to the generated text return_full_text: bool = False # Stop generating tokens if a member of `stop_sequences` is generated @@ -41,74 +182,91 @@ class Parameters(BaseModel): decoder_input_details: bool = False # Return the N most likely tokens at each step top_n_tokens: Optional[int] = None + # grammar to use for generation + grammar: Optional[Grammar] = None - @validator("best_of") + @field_validator("best_of") def valid_best_of(cls, field_value, values): if field_value is not None: if field_value <= 0: raise ValidationError("`best_of` must be strictly positive") - if field_value > 1 and values["seed"] is not None: + if field_value > 1 and values.data["seed"] is not None: raise ValidationError("`seed` must not be set when `best_of` is > 1") sampling = ( - values["do_sample"] - | (values["temperature"] is not None) - | (values["top_k"] is not None) - | (values["top_p"] is not None) - | (values["typical_p"] is not None) + values.data["do_sample"] + | (values.data["temperature"] is not None) + | (values.data["top_k"] is not None) + | (values.data["top_p"] is not None) + | (values.data["typical_p"] is not None) ) if field_value > 1 and not sampling: raise ValidationError("you must use sampling when `best_of` is > 1") return field_value - @validator("repetition_penalty") + @field_validator("repetition_penalty") def valid_repetition_penalty(cls, v): if v is not None and v <= 0: raise ValidationError("`repetition_penalty` must be strictly positive") return v - @validator("seed") + @field_validator("frequency_penalty") + def valid_frequency_penalty(cls, v): + if v is not None and v <= 0: + raise ValidationError("`frequency_penalty` must be strictly positive") + return v + + @field_validator("seed") def valid_seed(cls, v): if v is not None and v < 0: raise ValidationError("`seed` must be positive") return v - @validator("temperature") + @field_validator("temperature") def valid_temp(cls, v): if v is not None and v <= 0: raise ValidationError("`temperature` must be strictly positive") return v - @validator("top_k") + @field_validator("top_k") def valid_top_k(cls, v): if v is not None and v <= 0: raise ValidationError("`top_k` must be strictly positive") return v - @validator("top_p") + @field_validator("top_p") def valid_top_p(cls, v): if v is not None and (v <= 0 or v >= 1.0): raise ValidationError("`top_p` must be > 0.0 and < 1.0") return v - @validator("truncate") + @field_validator("truncate") def valid_truncate(cls, v): if v is not None and v <= 0: raise ValidationError("`truncate` must be strictly positive") return v - @validator("typical_p") + @field_validator("typical_p") def valid_typical_p(cls, v): if v is not None and (v <= 0 or v >= 1.0): raise ValidationError("`typical_p` must be > 0.0 and < 1.0") return v - @validator("top_n_tokens") + @field_validator("top_n_tokens") def valid_top_n_tokens(cls, v): if v is not None and v <= 0: raise ValidationError("`top_n_tokens` must be strictly positive") return v + @field_validator("grammar") + def valid_grammar(cls, v): + if v is not None: + if v.type == GrammarType.Regex and not v.value: + raise ValidationError("`value` cannot be empty for `regex` grammar") + if v.type == GrammarType.Json and not v.value: + raise ValidationError("`value` cannot be empty for `json` grammar") + return v + class Request(BaseModel): # Prompt @@ -118,15 +276,15 @@ class Request(BaseModel): # Whether to stream output tokens stream: bool = False - @validator("inputs") + @field_validator("inputs") def valid_input(cls, v): if not v: raise ValidationError("`inputs` cannot be empty") return v - @validator("stream") + @field_validator("stream") def valid_best_of_stream(cls, field_value, values): - parameters = values["parameters"] + parameters = values.data["parameters"] if ( parameters is not None and parameters.best_of is not None @@ -157,7 +315,7 @@ class Token(BaseModel): # Token text text: str # Logprob - logprob: float + logprob: Optional[float] = None # Is the token a special token # Can be used to ignore tokens when concatenating special: bool diff --git a/docs/index.html b/docs/index.html index 16d143d8..f582d3ce 100644 --- a/docs/index.html +++ b/docs/index.html @@ -27,4 +27,4 @@ } - \ No newline at end of file + diff --git a/docs/openapi.json b/docs/openapi.json index 6a15ae66..34b030f2 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -10,7 +10,7 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0" }, - "version": "1.2.0" + "version": "2.0.0" }, "paths": { "/": { @@ -342,6 +342,219 @@ } } } + }, + "/tokenize": { + "post": { + "tags": [ + "Text Generation Inference" + ], + "summary": "Tokenize inputs", + "description": "Tokenize inputs", + "operationId": "tokenize", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/GenerateRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Tokenized ids", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TokenizeResponse" + } + } + } + }, + "404": { + "description": "No tokenizer found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "No fast tokenizer available" + } + } + } + } + } + } + }, + "/v1/chat/completions": { + "post": { + "tags": [ + "Text Generation Inference" + ], + "summary": "Generate tokens", + "description": "Generate tokens", + "operationId": "chat_completions", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Generated Text", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatCompletionChunk" + } + } + } + }, + "422": { + "description": "Input validation error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Input validation error" + } + } + } + }, + "424": { + "description": "Generation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Request failed during generation" + } + } + } + }, + "429": { + "description": "Model is overloaded", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Model is overloaded" + } + } + } + }, + "500": { + "description": "Incomplete generation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Incomplete generation" + } + } + } + } + } + } + }, + "/v1/completions": { + "post": { + "tags": [ + "Text Generation Inference" + ], + "summary": "Generate tokens", + "description": "Generate tokens", + "operationId": "completions", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CompletionRequest" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Generated Text", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ChatCompletionChunk" + } + } + } + }, + "422": { + "description": "Input validation error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Input validation error" + } + } + } + }, + "424": { + "description": "Generation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Request failed during generation" + } + } + } + }, + "429": { + "description": "Model is overloaded", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Model is overloaded" + } + } + } + }, + "500": { + "description": "Incomplete generation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ErrorResponse" + }, + "example": { + "error": "Incomplete generation" + } + } + } + } + } + } } }, "components": { @@ -399,6 +612,353 @@ } } }, + "ChatCompletion": { + "type": "object", + "required": [ + "id", + "object", + "created", + "model", + "system_fingerprint", + "choices", + "usage" + ], + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ChatCompletionComplete" + } + }, + "created": { + "type": "integer", + "format": "int64", + "example": "1706270835", + "minimum": 0 + }, + "id": { + "type": "string" + }, + "model": { + "type": "string", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "object": { + "type": "string" + }, + "system_fingerprint": { + "type": "string" + }, + "usage": { + "$ref": "#/components/schemas/Usage" + } + } + }, + "ChatCompletionChoice": { + "type": "object", + "required": [ + "index", + "delta" + ], + "properties": { + "delta": { + "$ref": "#/components/schemas/ChatCompletionDelta" + }, + "finish_reason": { + "type": "string", + "nullable": true + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "logprobs": { + "allOf": [ + { + "$ref": "#/components/schemas/ChatCompletionLogprobs" + } + ], + "nullable": true + } + } + }, + "ChatCompletionChunk": { + "type": "object", + "required": [ + "id", + "object", + "created", + "model", + "system_fingerprint", + "choices" + ], + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ChatCompletionChoice" + } + }, + "created": { + "type": "integer", + "format": "int64", + "example": "1706270978", + "minimum": 0 + }, + "id": { + "type": "string" + }, + "model": { + "type": "string", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "object": { + "type": "string" + }, + "system_fingerprint": { + "type": "string" + } + } + }, + "ChatCompletionComplete": { + "type": "object", + "required": [ + "index", + "message", + "finish_reason" + ], + "properties": { + "finish_reason": { + "type": "string" + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "logprobs": { + "allOf": [ + { + "$ref": "#/components/schemas/ChatCompletionLogprobs" + } + ], + "nullable": true + }, + "message": { + "$ref": "#/components/schemas/Message" + } + } + }, + "ChatCompletionDelta": { + "type": "object", + "required": [ + "role" + ], + "properties": { + "content": { + "type": "string", + "example": "What is Deep Learning?", + "nullable": true + }, + "role": { + "type": "string", + "example": "user" + }, + "tool_calls": { + "allOf": [ + { + "$ref": "#/components/schemas/DeltaToolCall" + } + ], + "nullable": true + } + } + }, + "ChatCompletionLogprob": { + "type": "object", + "required": [ + "token", + "logprob", + "top_logprobs" + ], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + }, + "top_logprobs": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ChatCompletionTopLogprob" + } + } + } + }, + "ChatCompletionLogprobs": { + "type": "object", + "required": [ + "content" + ], + "properties": { + "content": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ChatCompletionLogprob" + } + } + } + }, + "ChatCompletionTopLogprob": { + "type": "object", + "required": [ + "token", + "logprob" + ], + "properties": { + "logprob": { + "type": "number", + "format": "float" + }, + "token": { + "type": "string" + } + } + }, + "ChatRequest": { + "type": "object", + "required": [ + "model", + "messages" + ], + "properties": { + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.", + "example": "1.0", + "nullable": true + }, + "logit_bias": { + "type": "array", + "items": { + "type": "number", + "format": "float" + }, + "description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.", + "nullable": true + }, + "logprobs": { + "type": "boolean", + "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.", + "example": "false", + "nullable": true + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens that can be generated in the chat completion.", + "example": "32", + "nullable": true, + "minimum": 0 + }, + "messages": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Message" + }, + "description": "A list of messages comprising the conversation so far.", + "example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]" + }, + "model": { + "type": "string", + "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "n": { + "type": "integer", + "format": "int32", + "description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.", + "example": "2", + "nullable": true, + "minimum": 0 + }, + "presence_penalty": { + "type": "number", + "format": "float", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics", + "example": 0.1, + "nullable": true + }, + "seed": { + "type": "integer", + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 + }, + "stop": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Up to 4 sequences where the API will stop generating further tokens.", + "example": "null", + "nullable": true + }, + "stream": { + "type": "boolean" + }, + "temperature": { + "type": "number", + "format": "float", + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.", + "example": 1.0, + "nullable": true + }, + "tool_choice": { + "allOf": [ + { + "$ref": "#/components/schemas/ToolType" + } + ], + "nullable": true + }, + "tool_prompt": { + "type": "string", + "description": "A prompt to be appended before the tools", + "example": "\"Based on the conversation, please choose the most appropriate tool to use: \"", + "nullable": true + }, + "tools": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Tool" + }, + "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.", + "example": "null", + "nullable": true + }, + "top_logprobs": { + "type": "integer", + "format": "int32", + "description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.", + "example": "5", + "nullable": true, + "minimum": 0 + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "example": 0.95, + "nullable": true + } + } + }, "CompatGenerateRequest": { "type": "object", "required": [ @@ -418,6 +978,164 @@ } } }, + "CompletionComplete": { + "type": "object", + "required": [ + "index", + "text", + "finish_reason" + ], + "properties": { + "finish_reason": { + "type": "string" + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "logprobs": { + "type": "array", + "items": { + "type": "number", + "format": "float" + }, + "nullable": true + }, + "text": { + "type": "string" + } + } + }, + "CompletionCompleteChunk": { + "type": "object", + "required": [ + "id", + "object", + "created", + "choices", + "model", + "system_fingerprint" + ], + "properties": { + "choices": { + "type": "array", + "items": { + "$ref": "#/components/schemas/CompletionComplete" + } + }, + "created": { + "type": "integer", + "format": "int64", + "minimum": 0 + }, + "id": { + "type": "string" + }, + "model": { + "type": "string" + }, + "object": { + "type": "string" + }, + "system_fingerprint": { + "type": "string" + } + } + }, + "CompletionRequest": { + "type": "object", + "required": [ + "model", + "prompt" + ], + "properties": { + "frequency_penalty": { + "type": "number", + "format": "float", + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.", + "example": "1.0", + "nullable": true + }, + "max_tokens": { + "type": "integer", + "format": "int32", + "description": "The maximum number of tokens that can be generated in the chat completion.", + "default": "32", + "nullable": true, + "minimum": 0 + }, + "model": { + "type": "string", + "description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.", + "example": "mistralai/Mistral-7B-Instruct-v0.2" + }, + "prompt": { + "type": "string", + "description": "The prompt to generate completions for.", + "example": "What is Deep Learning?" + }, + "repetition_penalty": { + "type": "number", + "format": "float", + "nullable": true + }, + "seed": { + "type": "integer", + "format": "int64", + "example": 42, + "nullable": true, + "minimum": 0 + }, + "stream": { + "type": "boolean" + }, + "suffix": { + "type": "string", + "description": "The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.\nplease see the completion_template field in the model's tokenizer_config.json file for completion template.", + "nullable": true + }, + "temperature": { + "type": "number", + "format": "float", + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.", + "example": 1.0, + "nullable": true + }, + "top_p": { + "type": "number", + "format": "float", + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.", + "example": 0.95, + "nullable": true + } + } + }, + "DeltaToolCall": { + "type": "object", + "required": [ + "index", + "id", + "type", + "function" + ], + "properties": { + "function": { + "$ref": "#/components/schemas/Function" + }, + "id": { + "type": "string" + }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "type": { + "type": "string" + } + } + }, "Details": { "type": "object", "required": [ @@ -494,7 +1212,40 @@ "length", "eos_token", "stop_sequence" - ] + ], + "example": "Length" + }, + "Function": { + "type": "object", + "required": [ + "arguments" + ], + "properties": { + "arguments": { + "type": "string" + }, + "name": { + "type": "string", + "nullable": true + } + } + }, + "FunctionDefinition": { + "type": "object", + "required": [ + "name", + "parameters" + ], + "properties": { + "description": { + "type": "string", + "nullable": true + }, + "name": { + "type": "string" + }, + "parameters": {} + } }, "GenerateParameters": { "type": "object", @@ -520,10 +1271,26 @@ "default": "false", "example": true }, + "frequency_penalty": { + "type": "number", + "format": "float", + "default": "null", + "example": 0.1, + "nullable": true, + "exclusiveMinimum": -2 + }, + "grammar": { + "allOf": [ + { + "$ref": "#/components/schemas/GrammarType" + } + ], + "nullable": true + }, "max_new_tokens": { "type": "integer", "format": "int32", - "default": "null", + "default": "100", "example": "20", "nullable": true, "minimum": 0 @@ -653,6 +1420,49 @@ } } }, + "GrammarType": { + "oneOf": [ + { + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "json" + ] + }, + "value": { + "description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions." + } + } + }, + { + "type": "object", + "required": [ + "type", + "value" + ], + "properties": { + "type": { + "type": "string", + "enum": [ + "regex" + ] + }, + "value": { + "type": "string" + } + } + } + ], + "discriminator": { + "propertyName": "type" + } + }, "Info": { "type": "object", "required": [ @@ -676,6 +1486,12 @@ "example": "null", "nullable": true }, + "max_batch_size": { + "type": "integer", + "example": "null", + "nullable": true, + "minimum": 0 + }, "max_batch_total_tokens": { "type": "integer", "format": "int32", @@ -758,6 +1574,35 @@ } } }, + "Message": { + "type": "object", + "required": [ + "role" + ], + "properties": { + "content": { + "type": "string", + "example": "My name is David and I", + "nullable": true + }, + "name": { + "type": "string", + "example": "\"David\"", + "nullable": true + }, + "role": { + "type": "string", + "example": "user" + }, + "tool_calls": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ToolCall" + }, + "nullable": true + } + } + }, "PrefillToken": { "type": "object", "required": [ @@ -784,6 +1629,37 @@ } } }, + "SimpleToken": { + "type": "object", + "required": [ + "id", + "text", + "start", + "stop" + ], + "properties": { + "id": { + "type": "integer", + "format": "int32", + "example": 0, + "minimum": 0 + }, + "start": { + "type": "integer", + "example": 0, + "minimum": 0 + }, + "stop": { + "type": "integer", + "example": 2, + "minimum": 0 + }, + "text": { + "type": "string", + "example": "test" + } + } + }, "StreamDetails": { "type": "object", "required": [ @@ -812,6 +1688,7 @@ "StreamResponse": { "type": "object", "required": [ + "index", "token" ], "properties": { @@ -830,6 +1707,11 @@ "example": "test", "nullable": true }, + "index": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, "token": { "$ref": "#/components/schemas/Token" }, @@ -871,6 +1753,95 @@ "example": "test" } } + }, + "TokenizeResponse": { + "type": "array", + "items": { + "$ref": "#/components/schemas/SimpleToken" + } + }, + "Tool": { + "type": "object", + "required": [ + "type", + "function" + ], + "properties": { + "function": { + "$ref": "#/components/schemas/FunctionDefinition" + }, + "type": { + "type": "string", + "example": "function" + } + } + }, + "ToolCall": { + "type": "object", + "required": [ + "id", + "type", + "function" + ], + "properties": { + "function": { + "$ref": "#/components/schemas/FunctionDefinition" + }, + "id": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "type": { + "type": "string" + } + } + }, + "ToolType": { + "oneOf": [ + { + "type": "object", + "required": [ + "FunctionName" + ], + "properties": { + "FunctionName": { + "type": "string" + } + } + }, + { + "type": "string", + "enum": [ + "OneOf" + ] + } + ] + }, + "Usage": { + "type": "object", + "required": [ + "prompt_tokens", + "completion_tokens", + "total_tokens" + ], + "properties": { + "completion_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "prompt_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + }, + "total_tokens": { + "type": "integer", + "format": "int32", + "minimum": 0 + } + } } } }, diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 6fa50a6a..1598c248 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -7,6 +7,8 @@ title: Installation - local: supported_models title: Supported Models and Hardware + - local: messages_api + title: Messages API title: Getting started - sections: - local: basic_tutorials/consuming_tgi @@ -21,6 +23,8 @@ title: All TGI CLI options - local: basic_tutorials/non_core_models title: Non-core Model Serving + - local: basic_tutorials/safety + title: Safety title: Tutorials - sections: - local: conceptual/streaming @@ -35,4 +39,8 @@ title: Safetensors - local: conceptual/flash_attention title: Flash Attention + - local: conceptual/speculation + title: Speculation (Medusa, ngram) + - local: conceptual/guidance + title: Guidance, JSON, tools (using outlines) title: Conceptual Guides diff --git a/docs/source/basic_tutorials/consuming_tgi.md b/docs/source/basic_tutorials/consuming_tgi.md index 540f4b13..4829ec7c 100644 --- a/docs/source/basic_tutorials/consuming_tgi.md +++ b/docs/source/basic_tutorials/consuming_tgi.md @@ -23,7 +23,7 @@ You can simply install `huggingface-hub` package with pip. pip install huggingface-hub ``` -Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python. +Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python. ```python from huggingface_hub import InferenceClient @@ -83,8 +83,8 @@ Gradio is a Python library that helps you build web applications for your machin pip install huggingface-hub gradio ``` -Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client). - +Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client). + ```python import gradio as gr from huggingface_hub import InferenceClient @@ -110,30 +110,30 @@ gr.ChatInterface( ).queue().launch() ``` -The UI looks like this 👇 +The UI looks like this 👇
- -
-You can try the demo directly here 👇 +You can try the demo directly here 👇
-