diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 8304c8d1..220b2fa3 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -8,6 +8,15 @@ on: tags: - 'v*' pull_request: + paths: + - ".github/workflows/build.yaml" + - "server/**" + - "proto/**" + - "router/**" + - "launcher/**" + - "Cargo.lock" + - "rust-toolchain.toml" + - "Dockerfile" branches: - 'main' @@ -15,6 +24,10 @@ jobs: build-and-push-image: runs-on: ubuntu-latest steps: + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v2.0.0 + with: + install: true - name: Tailscale uses: tailscale/github-action@v1 with: @@ -65,5 +78,5 @@ jobs: platforms: 'linux/amd64' tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=ghcr.io/huggingface/text-generation-inference:latest - cache-to: type=inline \ No newline at end of file + cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max + cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max \ No newline at end of file diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 8b659fe2..1a45ad04 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -3,14 +3,23 @@ name: Server Tests on: pull_request: paths: + - ".github/workflows/tests.yaml" - "server/**" - "proto/**" - "router/**" - "launcher/**" + - "Cargo.lock" + - "rust-toolchain.toml" jobs: run_tests: runs-on: ubuntu-20.04 + + env: + SCCACHE_GHA_ENABLED: "on" + RUSTC_WRAPPER: /usr/local/bin/sccache + SCCACHE: 0.3.3 + steps: - uses: actions/checkout@v2 - name: Set up Python @@ -25,19 +34,38 @@ jobs: components: rustfmt, clippy - name: Install Protoc uses: arduino/setup-protoc@v1 - - name: Loading cache. - uses: actions/cache@v2 - id: model_cache + - name: Install sccache + run: | + curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache + chmod +x /usr/local/bin/sccache + - name: configure sccache + uses: actions/github-script@v6 with: - path: ~/.cache/huggingface/ - key: models + script: | + core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || ''); + core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || ''); + core.exportVariable('SCCACHE_GHA_CACHE_TO', 'sccache-${{runner.os}}-${{github.ref_name}}'); + core.exportVariable('SCCACHE_GHA_CACHE_FROM', 'sccache-${{runner.os}}-main,sccache-${{runner.os}}-'); + - name: cargo registry cache + uses: actions/cache@v3 + with: + key: cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}-${{ github.sha }} + restore-keys: | + cargo-${{ runner.os }}-${{ hashFiles('**/Cargo.toml') }}- + cargo-${{ runner.os }}- + path: | + ~/.cargo/registry + ~/.cargo/git - name: Install run: | make install - name: Run server tests run: | pip install pytest - pytest -sv server/tests + HF_HUB_ENABLE_HF_TRANSFER=1 pytest -sv server/tests - name: Run Rust tests run: | cargo test + - name: sccache stats + run: | + /usr/local/bin/sccache --show-stats diff --git a/Cargo.lock b/Cargo.lock index e8a28bf9..9db5f213 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,6 +8,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "0.7.20" @@ -34,19 +45,20 @@ checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" [[package]] name = "async-stream" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" +checksum = "ad445822218ce64be7a341abfb0b1ea43b5c23aa83902542a4542e78309d8e5e" dependencies = [ "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" +checksum = "e4655ae1a7b0cdf149156f780c5bf3f1352bc53cbd9e0a361a7ef7b22947e965" dependencies = [ "proc-macro2", "quote", @@ -83,9 +95,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.6.4" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" +checksum = "6137c6234afb339e75e764c866e3594900f0211e1315d33779f269bbe2ec6967" dependencies = [ "async-trait", "axum-core", @@ -109,7 +121,7 @@ dependencies = [ "sync_wrapper", "tokio", "tower", - "tower-http", + "tower-http 0.4.0", "tower-layer", "tower-service", ] @@ -142,7 +154,7 @@ dependencies = [ "http", "opentelemetry", "tower", - "tower-http", + "tower-http 0.3.5", "tracing", "tracing-opentelemetry", ] @@ -265,9 +277,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.1.4" +version = "4.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f13b9c79b5d1dd500d20ef541215a6423c75829ef43117e1b4d17fd8af0b5d76" +checksum = "c3d7ae14b20b94cb02149ed21a86c423859cbe18dc7ed69845cace50e52b40a5" dependencies = [ "bitflags", "clap_derive", @@ -280,9 +292,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.1.0" +version = "4.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684a277d672e91966334af371f1a7b5833f9aa00b07c84e92fbce95e00208ce8" +checksum = "44bec8e5c9d09e439c4335b1af0abaab56dcf3b94999a936e1bb47b9134288f0" dependencies = [ "heck", "proc-macro-error", @@ -293,9 +305,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "783fe232adfca04f90f56201b26d79682d4cd2625e0bc7290b95123afe558ade" +checksum = "350b9cf31731f9957399229e9b2adc51eeabdfbe9d71d9a0552275fd12710d09" dependencies = [ "os_str_bytes", ] @@ -349,9 +361,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.6" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521" +checksum = "cf2b3e8478797446514c91ef04bafcb59faba183e621ad488df88983cc14128c" dependencies = [ "cfg-if", "crossbeam-utils", @@ -359,9 +371,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -370,9 +382,9 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.13" +version = "0.9.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9af1f4c2ef74bb8aa1f7e19706bc72d03598c8a570bb5de72243c7a9d9d5a" +checksum = "46bd5f3f85273295a9d14aedfb86f6aadbff6d8f5295c4a9edb08e819dcf5695" dependencies = [ "autocfg", "cfg-if", @@ -383,9 +395,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.14" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f" +checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" dependencies = [ "cfg-if", ] @@ -575,9 +587,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] @@ -774,7 +786,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "grpc-metadata" -version = "0.1.0" +version = "0.4.0" dependencies = [ "opentelemetry", "tonic", @@ -784,9 +796,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.15" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" +checksum = "5be7b54589b581f624f566bf5d8eb2bab1db736c51528720b6bd36b96b55924d" dependencies = [ "bytes", "fnv", @@ -806,6 +818,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -839,9 +854,9 @@ checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] name = "http" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ "bytes", "fnv", @@ -1004,9 +1019,9 @@ checksum = "30e22bd8629359895450b59ea7a776c850561b96a3b1d31321c1949d9e6c9146" [[package]] name = "is-terminal" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0a45d56fe973d6db23972bf5bc46f988a4a2385deac9cc29572f09daef" +checksum = "21b6b32576413a8e69b90e952e4a026476040d81017b80445deda5f2d3921857" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", @@ -1093,6 +1108,15 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "macro_rules_attribute" version = "0.1.3" @@ -1132,13 +1156,71 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "memoffset" -version = "0.7.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4" +checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1" dependencies = [ "autocfg", ] +[[package]] +name = "metrics" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b9b8653cec6897f73b519a43fba5ee3d50f62fe9af80b428accdcc093b4a849" +dependencies = [ + "ahash", + "metrics-macros", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8603921e1f54ef386189335f288441af761e0fc61bcb552168d9cedfe63ebc70" +dependencies = [ + "hyper", + "indexmap", + "ipnet", + "metrics", + "metrics-util", + "parking_lot", + "portable-atomic", + "quanta", + "thiserror", + "tokio", + "tracing", +] + +[[package]] +name = "metrics-macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "731f8ecebd9f3a4aa847dfe75455e4757a45da40a7793d2f0b1f9b6ed18b23f3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "metrics-util" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d24dc2dbae22bff6f1f9326ffce828c9f07ef9cc1e8002e5279f845432a30a" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown", + "metrics", + "num_cpus", + "parking_lot", + "portable-atomic", + "quanta", + "sketches-ddsketch", +] + [[package]] name = "mime" version = "0.3.16" @@ -1172,14 +1254,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" +checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -1268,9 +1350,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "once_cell" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "onig" @@ -1514,6 +1596,12 @@ version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +[[package]] +name = "portable-atomic" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26f6a7b87c2e435a3241addceeeff740ff8b7e76b74c13bf9acb17fa454ea00b" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -1565,9 +1653,9 @@ dependencies = [ [[package]] name = "prost" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21dc42e00223fc37204bd4aa177e69420c604ca4a183209a8f9de30c6d934698" +checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" dependencies = [ "bytes", "prost-derive", @@ -1575,9 +1663,9 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f8ad728fb08fe212df3c05169e940fbb6d9d16a877ddde14644a983ba2012e" +checksum = "2c828f93f5ca4826f97fedcbd3f9a536c16b12cff3dbbb4a007f932bbad95b12" dependencies = [ "bytes", "heck", @@ -1597,9 +1685,9 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda8c0881ea9f722eb9629376db3d0b903b462477c1aafcb0566610ac28ac5d" +checksum = "4ea9b0f8cbe5e15a8a042d030bd96668db28ecb567ec37d691971ff5731d2b1b" dependencies = [ "anyhow", "itertools 0.10.5", @@ -1610,14 +1698,29 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.11.6" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e0526209433e96d83d750dd81a99118edbc55739e7e61a46764fd2ad537788" +checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" dependencies = [ - "bytes", "prost", ] +[[package]] +name = "quanta" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e31331286705f455e56cca62e0e717158474ff02b7936c1fa596d983f4ae27" +dependencies = [ + "crossbeam-utils", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.2+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quote" version = "1.0.23" @@ -1657,6 +1760,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags", +] + [[package]] name = "rayon" version = "1.6.1" @@ -1736,15 +1848,6 @@ version = "0.6.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "reqwest" version = "0.11.14" @@ -1973,18 +2076,24 @@ dependencies = [ [[package]] name = "signal-hook-registry" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" dependencies = [ "libc", ] [[package]] -name = "slab" -version = "0.4.7" +name = "sketches-ddsketch" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "ceb945e54128e09c43d8e4f1277851bd5044c6fc540bbaa2ad888f60b3da9ae7" + +[[package]] +name = "slab" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" dependencies = [ "autocfg", ] @@ -1997,9 +2106,9 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "socket2" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +checksum = "95a21dcece9b5991cfd1ece74654c8e3d0d5aab499d359b0395e38229c0bb5a3" dependencies = [ "libc", "winapi", @@ -2053,9 +2162,9 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.107" +version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", "quote", @@ -2081,16 +2190,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.3.0" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95" dependencies = [ "cfg-if", "fastrand", - "libc", "redox_syscall", - "remove_dir_all", - "winapi", + "rustix", + "windows-sys 0.42.0", ] [[package]] @@ -2104,7 +2212,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "0.2.1" +version = "0.4.0" dependencies = [ "futures", "grpc-metadata", @@ -2121,9 +2229,9 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "0.2.1" +version = "0.4.0" dependencies = [ - "clap 4.1.4", + "clap 4.1.8", "ctrlc", "float_eq", "reqwest", @@ -2136,18 +2244,21 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "0.2.1" +version = "0.4.0" dependencies = [ "async-stream", "axum", "axum-tracing-opentelemetry", - "clap 4.1.4", + "clap 4.1.8", "futures", + "metrics", + "metrics-exporter-prometheus", "nohash-hasher", "opentelemetry", "opentelemetry-otlp", "parking_lot", "rand", + "reqwest", "serde", "serde_json", "text-generation-client", @@ -2155,6 +2266,7 @@ dependencies = [ "tokenizers", "tokio", "tokio-stream", + "tower-http 0.3.5", "tracing", "tracing-opentelemetry", "tracing-subscriber", @@ -2193,9 +2305,9 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f297120ff9d4efe680df143d5631bba9c75fa371992b7fcb33eb3453cb0a07" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ "cfg-if", "once_cell", @@ -2203,12 +2315,11 @@ dependencies = [ [[package]] name = "time" -version = "0.1.45" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "wasi 0.10.0+wasi-snapshot-preview1", "winapi", ] @@ -2264,9 +2375,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.25.0" +version = "1.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e00990ebabbe4c14c08aca901caed183ecd5c09562a12c824bb53d3c3fd3af" +checksum = "03201d01c3c27a29c8a5cee5b55a93ddae1ccf6f08f65365c2c918f8c1b76f64" dependencies = [ "autocfg", "bytes", @@ -2279,7 +2390,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -2315,9 +2426,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" +checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" dependencies = [ "futures-core", "pin-project-lite", @@ -2326,9 +2437,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.6" +version = "0.7.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6a3b08b64e6dfad376fa2432c7b1f01522e37a623c3050bc95db2d3ff21583" +checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" dependencies = [ "bytes", "futures-core", @@ -2417,12 +2528,30 @@ dependencies = [ "http-body", "http-range-header", "pin-project-lite", - "tower", "tower-layer", "tower-service", "tracing", ] +[[package]] +name = "tower-http" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d1d42a9b3f3ec46ba828e8d376aec14592ea199f70a06a548587ecd1c4ab658" +dependencies = [ + "bitflags", + "bytes", + "futures-core", + "futures-util", + "http", + "http-body", + "http-range-header", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + [[package]] name = "tower-layer" version = "0.3.2" @@ -2627,9 +2756,9 @@ dependencies = [ [[package]] name = "utoipa" -version = "3.0.1" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3920fa753064b1be7842bea26175ffa0dfc4a8f30bcb52b8ff03fddf8889914c" +checksum = "a15f6da6a2b471134ca44b7d18e8a76d73035cf8b3ed24c4dd5ca6a63aa439c5" dependencies = [ "indexmap", "serde", @@ -2639,9 +2768,9 @@ dependencies = [ [[package]] name = "utoipa-gen" -version = "3.0.1" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "720298fac6efca20df9e457e67a1eab41a20d1c3101380b5c4dca1ca60ae0062" +checksum = "6f2e33027986a4707b3f5c37ed01b33d0e5a53da30204b52ff18f80600f1d0ec" dependencies = [ "proc-macro-error", "proc-macro2", @@ -2712,9 +2841,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasi" diff --git a/Dockerfile b/Dockerfile index 907379dc..5fbf8985 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,15 @@ -FROM rust:1.67 as router-builder +FROM lukemathwalker/cargo-chef:latest-rust-1.67 AS chef +WORKDIR /usr/src + +FROM chef as planner +COPY Cargo.toml Cargo.toml +COPY rust-toolchain.toml rust-toolchain.toml +COPY proto proto +COPY router router +COPY launcher launcher +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ @@ -6,26 +17,15 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ rm -f $PROTOC_ZIP -WORKDIR /usr/src +COPY --from=planner /usr/src/recipe.json recipe.json +RUN cargo chef cook --release --recipe-path recipe.json +COPY Cargo.toml Cargo.toml COPY rust-toolchain.toml rust-toolchain.toml COPY proto proto COPY router router - -WORKDIR /usr/src/router - -RUN cargo install --path . - -FROM rust:1.67 as launcher-builder - -WORKDIR /usr/src - -COPY rust-toolchain.toml rust-toolchain.toml COPY launcher launcher - -WORKDIR /usr/src/launcher - -RUN cargo install --path . +RUN cargo build --release FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 @@ -33,6 +33,7 @@ ENV LANG=C.UTF-8 \ LC_ALL=C.UTF-8 \ DEBIAN_FRONTEND=noninteractive \ HUGGINGFACE_HUB_CACHE=/data \ + HF_HUB_ENABLE_HF_TRANSFER=1 \ MODEL_ID=bigscience/bloom-560m \ QUANTIZE=false \ NUM_SHARD=1 \ @@ -68,9 +69,9 @@ RUN cd server && \ /opt/miniconda/envs/text-generation/bin/pip install ".[bnb]" --no-cache-dir # Install router -COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/local/bin/text-generation-router +COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router # Install launcher -COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher +COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher ENTRYPOINT ["text-generation-launcher"] CMD ["--json-output"] \ No newline at end of file diff --git a/Makefile b/Makefile index 08217730..3defd886 100644 --- a/Makefile +++ b/Makefile @@ -13,25 +13,25 @@ server-dev: cd server && make run-dev router-dev: - cd router && cargo run + cd router && cargo run -- --port 8080 integration-tests: install-router install-launcher cargo test python-tests: - cd server && pytest tests + cd server && HF_HUB_ENABLE_HF_TRANSFER=1 pytest tests run-bloom-560m: - text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 + text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --port 8080 run-bloom-560m-quantize: - text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize + text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize --port 8080 download-bloom: - text-generation-server download-weights bigscience/bloom + HF_HUB_ENABLE_HF_TRANSFER=1 text-generation-server download-weights bigscience/bloom run-bloom: - text-generation-launcher --model-id bigscience/bloom --num-shard 8 + text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080 run-bloom-quantize: - text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize \ No newline at end of file + text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080 \ No newline at end of file diff --git a/README.md b/README.md index 040d088c..ee724487 100644 --- a/README.md +++ b/README.md @@ -39,27 +39,30 @@ to power LLMs api-inference widgets. ## Features +- Serve the most popular Large Language Models with a simple launcher +- Tensor Parallelism for faster inference on multiple GPUs - Token streaming using Server-Sent Events (SSE) - [Dynamic batching of incoming requests](https://github.com/huggingface/text-generation-inference/blob/main/router/src/batcher.rs#L88) for increased total throughput - Quantization with [bitsandbytes](https://github.com/TimDettmers/bitsandbytes) - [Safetensors](https://github.com/huggingface/safetensors) weight loading -- 45ms per token generation for BLOOM with 8xA100 80GB +- Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) - Logits warpers (temperature scaling, topk, repetition penalty ...) - Stop sequences - Log probabilities -- Distributed tracing with Open Telemetry +- Production ready (distributed tracing with Open Telemetry, Prometheus metrics) -## Officially supported models +## Officially supported architectures - [BLOOM](https://huggingface.co/bigscience/bloom) - [BLOOMZ](https://huggingface.co/bigscience/bloomz) - [MT0-XXL](https://huggingface.co/bigscience/mt0-xxl) -- ~~[Galactica](https://huggingface.co/facebook/galactica-120b)~~ (deactivated) +- [Galactica](https://huggingface.co/facebook/galactica-120b) - [SantaCoder](https://huggingface.co/bigcode/santacoder) - [GPT-Neox 20B](https://huggingface.co/EleutherAI/gpt-neox-20b) - [FLAN-T5-XXL](https://huggingface.co/google/flan-t5-xxl) +- [FLAN-UL2](https://huggingface.co/google/flan-ul2) -Other models are supported on a best effort basis using: +Other architectures are supported on a best effort basis using: `AutoModelForCausalLM.from_pretrained(, device_map="auto")` @@ -80,24 +83,42 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --num-shard $num_shard ``` +**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. You can then query the model using either the `/generate` or `/generate_stream` routes: ```shell curl 127.0.0.1:8080/generate \ -X POST \ - -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \ -H 'Content-Type: application/json' ``` ```shell curl 127.0.0.1:8080/generate_stream \ -X POST \ - -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \ -H 'Content-Type: application/json' ``` -**Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. +or from Python: + +```shell +pip install text-generation +``` + +```python +from text_generation import Client + +client = Client("http://127.0.0.1:8080") +print(client.generate("What is Deep Learning?", max_new_tokens=17).generated_text) + +text = "" +for response in client.generate_stream("What is Deep Learning?", max_new_tokens=17): + if not response.token.special: + text += response.token.text +print(text) +``` ### API documentation @@ -191,7 +212,7 @@ Be aware that the official Docker image has them enabled by default. ### Download -First you need to download the weights: +It is advised to download the weights ahead of time with the following command: ```shell make download-bloom diff --git a/clients/python/.gitignore b/clients/python/.gitignore new file mode 100644 index 00000000..5758ba92 --- /dev/null +++ b/clients/python/.gitignore @@ -0,0 +1,158 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +text_generation/__pycache__/ +text_generation/pb/__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +transformers +safetensors \ No newline at end of file diff --git a/clients/python/Makefile b/clients/python/Makefile new file mode 100644 index 00000000..8b4334bd --- /dev/null +++ b/clients/python/Makefile @@ -0,0 +1,6 @@ +unit-tests: + python -m pytest --cov=text_generation tests + +install: + pip install pip --upgrade + pip install -e . \ No newline at end of file diff --git a/clients/python/README.md b/clients/python/README.md new file mode 100644 index 00000000..f509e65c --- /dev/null +++ b/clients/python/README.md @@ -0,0 +1,196 @@ +# Text Generation + +The Hugging Face Text Generation Python library provides a convenient way of interfacing with a +`text-generation-inference` instance running on +[Hugging Face Inference Endpoints](https://huggingface.co/inference-endpoints) or on the Hugging Face Hub. + +## Get Started + +### Install + +```shell +pip install text-generation +``` + +### Inference API Usage + +```python +from text_generation import InferenceAPIClient + +client = InferenceAPIClient("bigscience/bloomz") +text = client.generate("Why is the sky blue?").generated_text +print(text) +# ' Rayleigh scattering' + +# Token Streaming +text = "" +for response in client.generate_stream("Why is the sky blue?"): + if not response.token.special: + text += response.token.text + +print(text) +# ' Rayleigh scattering' +``` + +or with the asynchronous client: + +```python +from text_generation import InferenceAPIAsyncClient + +client = InferenceAPIAsyncClient("bigscience/bloomz") +response = await client.generate("Why is the sky blue?") +print(response.generated_text) +# ' Rayleigh scattering' + +# Token Streaming +text = "" +async for response in client.generate_stream("Why is the sky blue?"): + if not response.token.special: + text += response.token.text + +print(text) +# ' Rayleigh scattering' +``` + +### Hugging Face Inference Endpoint usage + +```python +from text_generation import Client + +endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud" + +client = Client(endpoint_url) +text = client.generate("Why is the sky blue?").generated_text +print(text) +# ' Rayleigh scattering' + +# Token Streaming +text = "" +for response in client.generate_stream("Why is the sky blue?"): + if not response.token.special: + text += response.token.text + +print(text) +# ' Rayleigh scattering' +``` + +or with the asynchronous client: + +```python +from text_generation import AsyncClient + +endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud" + +client = AsyncClient(endpoint_url) +response = await client.generate("Why is the sky blue?") +print(response.generated_text) +# ' Rayleigh scattering' + +# Token Streaming +text = "" +async for response in client.generate_stream("Why is the sky blue?"): + if not response.token.special: + text += response.token.text + +print(text) +# ' Rayleigh scattering' +``` + +### Types + +```python +# Prompt tokens +class PrefillToken: + # Token ID from the model tokenizer + id: int + # Token text + text: str + # Logprob + # Optional since the logprob of the first token cannot be computed + logprob: Optional[float] + + +# Generated tokens +class Token: + # Token ID from the model tokenizer + id: int + # Token text + text: str + # Logprob + logprob: float + # Is the token a special token + # Can be used to ignore tokens when concatenating + special: bool + + +# Generation finish reason +class FinishReason(Enum): + # number of generated tokens == `max_new_tokens` + Length = "length" + # the model generated its end of sequence token + EndOfSequenceToken = "eos_token" + # the model generated a text included in `stop_sequences` + StopSequence = "stop_sequence" + + +# Additional sequences when using the `best_of` parameter +class BestOfSequence: + # Generated text + generated_text: str + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + # Prompt tokens + prefill: List[PrefillToken] + # Generated tokens + tokens: List[Token] + + +# `generate` details +class Details: + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + # Prompt tokens + prefill: List[PrefillToken] + # Generated tokens + tokens: List[Token] + # Additional sequences when using the `best_of` parameter + best_of_sequences: Optional[List[BestOfSequence]] + + +# `generate` return value +class Response: + # Generated text + generated_text: str + # Generation details + details: Details + + +# `generate_stream` details +class StreamDetails: + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + + +# `generate_stream` return value +class StreamResponse: + # Generated token + token: Token + # Complete generated text + # Only available when the generation is finished + generated_text: Optional[str] + # Generation details + # Only available when the generation is finished + details: Optional[StreamDetails] +``` \ No newline at end of file diff --git a/clients/python/poetry.lock b/clients/python/poetry.lock new file mode 100644 index 00000000..1503c40e --- /dev/null +++ b/clients/python/poetry.lock @@ -0,0 +1,1038 @@ +[[package]] +name = "aiohttp" +version = "3.8.4" +description = "Async http client/server framework (asyncio)" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +asynctest = {version = "0.13.0", markers = "python_version < \"3.8\""} +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +typing-extensions = {version = ">=3.6.5", markers = "python_version < \"3.8\""} + +[[package]] +name = "asynctest" +version = "0.13.0" +description = "Enhance the standard unittest package with features for testing asyncio libraries" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "atomicwrites" +version = "1.4.1" +description = "Atomic file writes." +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" + +[[package]] +name = "attrs" +version = "22.2.0" +description = "Classes Without Boilerplate" +category = "main" +optional = false +python-versions = ">=3.6" + +[package.extras] +cov = ["attrs[tests]", "coverage-enable-subprocess", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope.interface"] +tests = ["attrs[tests-no-zope]", "zope.interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests_no_zope = ["cloudpickle", "hypothesis", "mypy (>=0.971,<0.990)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] + +[[package]] +name = "certifi" +version = "2022.12.7" +description = "Python package for providing Mozilla's CA Bundle." +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "charset-normalizer" +version = "3.1.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +category = "main" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" + +[[package]] +name = "coverage" +version = "7.2.1" +description = "Code coverage measurement for Python" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""} + +[package.extras] +toml = ["tomli"] + +[[package]] +name = "filelock" +version = "3.10.0" +description = "A platform independent file lock." +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo (>=2022.12.7)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.22,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.1)", "pytest (>=7.2.2)", "pytest-cov (>=4)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "frozenlist" +version = "1.3.3" +description = "A list-like structure which implements collections.abc.MutableSequence" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "huggingface-hub" +version = "0.13.2" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +category = "main" +optional = false +python-versions = ">=3.7.0" + +[package.dependencies] +filelock = "*" +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"] +torch = ["torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +category = "main" +optional = false +python-versions = ">=3.5" + +[[package]] +name = "importlib-metadata" +version = "6.0.0" +description = "Read metadata from Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "packaging" +version = "23.0" +description = "Core utilities for Python packages" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "pluggy" +version = "1.0.0" +description = "plugin and hook calling mechanisms for python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "py" +version = "1.11.0" +description = "library with cross-python path, ini-parsing, io, code, log facilities" +category = "dev" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" + +[[package]] +name = "pydantic" +version = "1.10.6" +description = "Data validation and settings management using python type hints" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "pytest" +version = "6.2.5" +description = "pytest: simple powerful testing with Python" +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} +attrs = ">=19.2.0" +colorama = {version = "*", markers = "sys_platform == \"win32\""} +importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +py = ">=1.8.2" +toml = "*" + +[package.extras] +testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "requests", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.17.2" +description = "Pytest support for asyncio" +category = "dev" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +pytest = ">=6.1.0" +typing-extensions = {version = ">=4.0", markers = "python_version < \"3.8\""} + +[package.extras] +testing = ["coverage (==6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (==0.931)"] + +[[package]] +name = "pytest-cov" +version = "3.0.0" +description = "Pytest plugin for measuring coverage." +category = "dev" +optional = false +python-versions = ">=3.6" + +[package.dependencies] +coverage = {version = ">=5.2.1", extras = ["toml"]} +pytest = ">=4.6" + +[package.extras] +testing = ["fields", "hunter", "process-tests", "pytest-xdist", "six", "virtualenv"] + +[[package]] +name = "PyYAML" +version = "6.0" +description = "YAML parser and emitter for Python" +category = "main" +optional = false +python-versions = ">=3.6" + +[[package]] +name = "requests" +version = "2.28.2" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7, <4" + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<1.27" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use_chardet_on_py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +category = "dev" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +category = "dev" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "typing-extensions" +version = "4.5.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +category = "main" +optional = false +python-versions = ">=3.7" + +[[package]] +name = "urllib3" +version = "1.26.15" +description = "HTTP library with thread-safe connection pooling, file post, and more." +category = "main" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "yarl" +version = "1.8.2" +description = "Yet another URL library" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" +typing-extensions = {version = ">=3.7.4", markers = "python_version < \"3.8\""} + +[[package]] +name = "zipp" +version = "3.15.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.7" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] + +[metadata] +lock-version = "1.1" +python-versions = "^3.7" +content-hash = "0db2f97d52c557dd7f90c55b4ad5bbe308c957c5f7f99fec53c57e0a13822cb4" + +[metadata.files] +aiohttp = [ + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ce45967538fb747370308d3145aa68a074bdecb4f3a300869590f725ced69c1"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b744c33b6f14ca26b7544e8d8aadff6b765a80ad6164fb1a430bbadd593dfb1a"}, + {file = "aiohttp-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a45865451439eb320784918617ba54b7a377e3501fb70402ab84d38c2cd891b"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a86d42d7cba1cec432d47ab13b6637bee393a10f664c425ea7b305d1301ca1a3"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee3c36df21b5714d49fc4580247947aa64bcbe2939d1b77b4c8dcb8f6c9faecc"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:176a64b24c0935869d5bbc4c96e82f89f643bcdf08ec947701b9dbb3c956b7dd"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c844fd628851c0bc309f3c801b3a3d58ce430b2ce5b359cd918a5a76d0b20cb5"}, + {file = "aiohttp-3.8.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5393fb786a9e23e4799fec788e7e735de18052f83682ce2dfcabaf1c00c2c08e"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e4b09863aae0dc965c3ef36500d891a3ff495a2ea9ae9171e4519963c12ceefd"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:adfbc22e87365a6e564c804c58fc44ff7727deea782d175c33602737b7feadb6"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:147ae376f14b55f4f3c2b118b95be50a369b89b38a971e80a17c3fd623f280c9"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:eafb3e874816ebe2a92f5e155f17260034c8c341dad1df25672fb710627c6949"}, + {file = "aiohttp-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c6cc15d58053c76eacac5fa9152d7d84b8d67b3fde92709195cb984cfb3475ea"}, + {file = "aiohttp-3.8.4-cp310-cp310-win32.whl", hash = "sha256:59f029a5f6e2d679296db7bee982bb3d20c088e52a2977e3175faf31d6fb75d1"}, + {file = "aiohttp-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:fe7ba4a51f33ab275515f66b0a236bcde4fb5561498fe8f898d4e549b2e4509f"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3d8ef1a630519a26d6760bc695842579cb09e373c5f227a21b67dc3eb16cfea4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b3f2e06a512e94722886c0827bee9807c86a9f698fac6b3aee841fab49bbfb4"}, + {file = "aiohttp-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3a80464982d41b1fbfe3154e440ba4904b71c1a53e9cd584098cd41efdb188ef"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b631e26df63e52f7cce0cce6507b7a7f1bc9b0c501fcde69742130b32e8782f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f43255086fe25e36fd5ed8f2ee47477408a73ef00e804cb2b5cba4bf2ac7f5e"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4d347a172f866cd1d93126d9b239fcbe682acb39b48ee0873c73c933dd23bd0f"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3fec6a4cb5551721cdd70473eb009d90935b4063acc5f40905d40ecfea23e05"}, + {file = "aiohttp-3.8.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80a37fe8f7c1e6ce8f2d9c411676e4bc633a8462844e38f46156d07a7d401654"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d1e6a862b76f34395a985b3cd39a0d949ca80a70b6ebdea37d3ab39ceea6698a"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd468460eefef601ece4428d3cf4562459157c0f6523db89365202c31b6daebb"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:618c901dd3aad4ace71dfa0f5e82e88b46ef57e3239fc7027773cb6d4ed53531"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:652b1bff4f15f6287550b4670546a2947f2a4575b6c6dff7760eafb22eacbf0b"}, + {file = "aiohttp-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80575ba9377c5171407a06d0196b2310b679dc752d02a1fcaa2bc20b235dbf24"}, + {file = "aiohttp-3.8.4-cp311-cp311-win32.whl", hash = "sha256:bbcf1a76cf6f6dacf2c7f4d2ebd411438c275faa1dc0c68e46eb84eebd05dd7d"}, + {file = "aiohttp-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:6e74dd54f7239fcffe07913ff8b964e28b712f09846e20de78676ce2a3dc0bfc"}, + {file = "aiohttp-3.8.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:880e15bb6dad90549b43f796b391cfffd7af373f4646784795e20d92606b7a51"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb96fa6b56bb536c42d6a4a87dfca570ff8e52de2d63cabebfd6fb67049c34b6"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a6cadebe132e90cefa77e45f2d2f1a4b2ce5c6b1bfc1656c1ddafcfe4ba8131"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f352b62b45dff37b55ddd7b9c0c8672c4dd2eb9c0f9c11d395075a84e2c40f75"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ab43061a0c81198d88f39aaf90dae9a7744620978f7ef3e3708339b8ed2ef01"}, + {file = "aiohttp-3.8.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9cb1565a7ad52e096a6988e2ee0397f72fe056dadf75d17fa6b5aebaea05622"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:1b3ea7edd2d24538959c1c1abf97c744d879d4e541d38305f9bd7d9b10c9ec41"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:7c7837fe8037e96b6dd5cfcf47263c1620a9d332a87ec06a6ca4564e56bd0f36"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:3b90467ebc3d9fa5b0f9b6489dfb2c304a1db7b9946fa92aa76a831b9d587e99"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:cab9401de3ea52b4b4c6971db5fb5c999bd4260898af972bf23de1c6b5dd9d71"}, + {file = "aiohttp-3.8.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d1f9282c5f2b5e241034a009779e7b2a1aa045f667ff521e7948ea9b56e0c5ff"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win32.whl", hash = "sha256:5e14f25765a578a0a634d5f0cd1e2c3f53964553a00347998dfdf96b8137f777"}, + {file = "aiohttp-3.8.4-cp36-cp36m-win_amd64.whl", hash = "sha256:4c745b109057e7e5f1848c689ee4fb3a016c8d4d92da52b312f8a509f83aa05e"}, + {file = "aiohttp-3.8.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:aede4df4eeb926c8fa70de46c340a1bc2c6079e1c40ccf7b0eae1313ffd33519"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ddaae3f3d32fc2cb4c53fab020b69a05c8ab1f02e0e59665c6f7a0d3a5be54f"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4eb3b82ca349cf6fadcdc7abcc8b3a50ab74a62e9113ab7a8ebc268aad35bb9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9bcb89336efa095ea21b30f9e686763f2be4478f1b0a616969551982c4ee4c3b"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c08e8ed6fa3d477e501ec9db169bfac8140e830aa372d77e4a43084d8dd91ab"}, + {file = "aiohttp-3.8.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6cd05ea06daca6ad6a4ca3ba7fe7dc5b5de063ff4daec6170ec0f9979f6c332"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7a00a9ed8d6e725b55ef98b1b35c88013245f35f68b1b12c5cd4100dddac333"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:de04b491d0e5007ee1b63a309956eaed959a49f5bb4e84b26c8f5d49de140fa9"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:40653609b3bf50611356e6b6554e3a331f6879fa7116f3959b20e3528783e699"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dbf3a08a06b3f433013c143ebd72c15cac33d2914b8ea4bea7ac2c23578815d6"}, + {file = "aiohttp-3.8.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:854f422ac44af92bfe172d8e73229c270dc09b96535e8a548f99c84f82dde241"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win32.whl", hash = "sha256:aeb29c84bb53a84b1a81c6c09d24cf33bb8432cc5c39979021cc0f98c1292a1a"}, + {file = "aiohttp-3.8.4-cp37-cp37m-win_amd64.whl", hash = "sha256:db3fc6120bce9f446d13b1b834ea5b15341ca9ff3f335e4a951a6ead31105480"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fabb87dd8850ef0f7fe2b366d44b77d7e6fa2ea87861ab3844da99291e81e60f"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:91f6d540163f90bbaef9387e65f18f73ffd7c79f5225ac3d3f61df7b0d01ad15"}, + {file = "aiohttp-3.8.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d265f09a75a79a788237d7f9054f929ced2e69eb0bb79de3798c468d8a90f945"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d89efa095ca7d442a6d0cbc755f9e08190ba40069b235c9886a8763b03785da"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dac314662f4e2aa5009977b652d9b8db7121b46c38f2073bfeed9f4049732cd"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fe11310ae1e4cd560035598c3f29d86cef39a83d244c7466f95c27ae04850f10"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ddb2a2026c3f6a68c3998a6c47ab6795e4127315d2e35a09997da21865757f8"}, + {file = "aiohttp-3.8.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e75b89ac3bd27d2d043b234aa7b734c38ba1b0e43f07787130a0ecac1e12228a"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6e601588f2b502c93c30cd5a45bfc665faaf37bbe835b7cfd461753068232074"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a5d794d1ae64e7753e405ba58e08fcfa73e3fad93ef9b7e31112ef3c9a0efb52"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a1f4689c9a1462f3df0a1f7e797791cd6b124ddbee2b570d34e7f38ade0e2c71"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:3032dcb1c35bc330134a5b8a5d4f68c1a87252dfc6e1262c65a7e30e62298275"}, + {file = "aiohttp-3.8.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8189c56eb0ddbb95bfadb8f60ea1b22fcfa659396ea36f6adcc521213cd7b44d"}, + {file = "aiohttp-3.8.4-cp38-cp38-win32.whl", hash = "sha256:33587f26dcee66efb2fff3c177547bd0449ab7edf1b73a7f5dea1e38609a0c54"}, + {file = "aiohttp-3.8.4-cp38-cp38-win_amd64.whl", hash = "sha256:e595432ac259af2d4630008bf638873d69346372d38255774c0e286951e8b79f"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5a7bdf9e57126dc345b683c3632e8ba317c31d2a41acd5800c10640387d193ed"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:22f6eab15b6db242499a16de87939a342f5a950ad0abaf1532038e2ce7d31567"}, + {file = "aiohttp-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7235604476a76ef249bd64cb8274ed24ccf6995c4a8b51a237005ee7a57e8643"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea9eb976ffdd79d0e893869cfe179a8f60f152d42cb64622fca418cd9b18dc2a"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:92c0cea74a2a81c4c76b62ea1cac163ecb20fb3ba3a75c909b9fa71b4ad493cf"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:493f5bc2f8307286b7799c6d899d388bbaa7dfa6c4caf4f97ef7521b9cb13719"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a63f03189a6fa7c900226e3ef5ba4d3bd047e18f445e69adbd65af433add5a2"}, + {file = "aiohttp-3.8.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10c8cefcff98fd9168cdd86c4da8b84baaa90bf2da2269c6161984e6737bf23e"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bca5f24726e2919de94f047739d0a4fc01372801a3672708260546aa2601bf57"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:03baa76b730e4e15a45f81dfe29a8d910314143414e528737f8589ec60cf7391"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8c29c77cc57e40f84acef9bfb904373a4e89a4e8b74e71aa8075c021ec9078c2"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:03543dcf98a6619254b409be2d22b51f21ec66272be4ebda7b04e6412e4b2e14"}, + {file = "aiohttp-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:17b79c2963db82086229012cff93ea55196ed31f6493bb1ccd2c62f1724324e4"}, + {file = "aiohttp-3.8.4-cp39-cp39-win32.whl", hash = "sha256:34ce9f93a4a68d1272d26030655dd1b58ff727b3ed2a33d80ec433561b03d67a"}, + {file = "aiohttp-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:41a86a69bb63bb2fc3dc9ad5ea9f10f1c9c8e282b471931be0268ddd09430b04"}, + {file = "aiohttp-3.8.4.tar.gz", hash = "sha256:bf2e1a9162c1e441bf805a1fd166e249d574ca04e03b34f97e2928769e91ab5c"}, +] +aiosignal = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] +async-timeout = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] +asynctest = [ + {file = "asynctest-0.13.0-py3-none-any.whl", hash = "sha256:5da6118a7e6d6b54d83a8f7197769d046922a44d2a99c21382f0a6e4fadae676"}, + {file = "asynctest-0.13.0.tar.gz", hash = "sha256:c27862842d15d83e6a34eb0b2866c323880eb3a75e4485b079ea11748fd77fac"}, +] +atomicwrites = [ + {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, +] +attrs = [ + {file = "attrs-22.2.0-py3-none-any.whl", hash = "sha256:29e95c7f6778868dbd49170f98f8818f78f3dc5e0e37c0b1f474e3561b240836"}, + {file = "attrs-22.2.0.tar.gz", hash = "sha256:c9227bfc2f01993c03f68db37d1d15c9690188323c067c641f1a35ca58185f99"}, +] +certifi = [ + {file = "certifi-2022.12.7-py3-none-any.whl", hash = "sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18"}, + {file = "certifi-2022.12.7.tar.gz", hash = "sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3"}, +] +charset-normalizer = [ + {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"}, + {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"}, + {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"}, + {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"}, + {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"}, + {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"}, + {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, +] +colorama = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] +coverage = [ + {file = "coverage-7.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49567ec91fc5e0b15356da07a2feabb421d62f52a9fff4b1ec40e9e19772f5f8"}, + {file = "coverage-7.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d2ef6cae70168815ed91388948b5f4fcc69681480a0061114db737f957719f03"}, + {file = "coverage-7.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3004765bca3acd9e015794e5c2f0c9a05587f5e698127ff95e9cfba0d3f29339"}, + {file = "coverage-7.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cca7c0b7f5881dfe0291ef09ba7bb1582cb92ab0aeffd8afb00c700bf692415a"}, + {file = "coverage-7.2.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2167d116309f564af56f9aa5e75ef710ef871c5f9b313a83050035097b56820"}, + {file = "coverage-7.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cb5f152fb14857cbe7f3e8c9a5d98979c4c66319a33cad6e617f0067c9accdc4"}, + {file = "coverage-7.2.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:87dc37f16fb5e3a28429e094145bf7c1753e32bb50f662722e378c5851f7fdc6"}, + {file = "coverage-7.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e191a63a05851f8bce77bc875e75457f9b01d42843f8bd7feed2fc26bbe60833"}, + {file = "coverage-7.2.1-cp310-cp310-win32.whl", hash = "sha256:e3ea04b23b114572b98a88c85379e9e9ae031272ba1fb9b532aa934c621626d4"}, + {file = "coverage-7.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:0cf557827be7eca1c38a2480484d706693e7bb1929e129785fe59ec155a59de6"}, + {file = "coverage-7.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:570c21a29493b350f591a4b04c158ce1601e8d18bdcd21db136fbb135d75efa6"}, + {file = "coverage-7.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9e872b082b32065ac2834149dc0adc2a2e6d8203080501e1e3c3c77851b466f9"}, + {file = "coverage-7.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fac6343bae03b176e9b58104a9810df3cdccd5cfed19f99adfa807ffbf43cf9b"}, + {file = "coverage-7.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abacd0a738e71b20e224861bc87e819ef46fedba2fb01bc1af83dfd122e9c319"}, + {file = "coverage-7.2.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9256d4c60c4bbfec92721b51579c50f9e5062c21c12bec56b55292464873508"}, + {file = "coverage-7.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:80559eaf6c15ce3da10edb7977a1548b393db36cbc6cf417633eca05d84dd1ed"}, + {file = "coverage-7.2.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0bd7e628f6c3ec4e7d2d24ec0e50aae4e5ae95ea644e849d92ae4805650b4c4e"}, + {file = "coverage-7.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:09643fb0df8e29f7417adc3f40aaf379d071ee8f0350ab290517c7004f05360b"}, + {file = "coverage-7.2.1-cp311-cp311-win32.whl", hash = "sha256:1b7fb13850ecb29b62a447ac3516c777b0e7a09ecb0f4bb6718a8654c87dfc80"}, + {file = "coverage-7.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:617a94ada56bbfe547aa8d1b1a2b8299e2ec1ba14aac1d4b26a9f7d6158e1273"}, + {file = "coverage-7.2.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8649371570551d2fd7dee22cfbf0b61f1747cdfb2b7587bb551e4beaaa44cb97"}, + {file = "coverage-7.2.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d2b9b5e70a21474c105a133ba227c61bc95f2ac3b66861143ce39a5ea4b3f84"}, + {file = "coverage-7.2.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae82c988954722fa07ec5045c57b6d55bc1a0890defb57cf4a712ced65b26ddd"}, + {file = "coverage-7.2.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:861cc85dfbf55a7a768443d90a07e0ac5207704a9f97a8eb753292a7fcbdfcfc"}, + {file = "coverage-7.2.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:0339dc3237c0d31c3b574f19c57985fcbe494280153bbcad33f2cdf469f4ac3e"}, + {file = "coverage-7.2.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:5928b85416a388dd557ddc006425b0c37e8468bd1c3dc118c1a3de42f59e2a54"}, + {file = "coverage-7.2.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8d3843ca645f62c426c3d272902b9de90558e9886f15ddf5efe757b12dd376f5"}, + {file = "coverage-7.2.1-cp37-cp37m-win32.whl", hash = "sha256:6a034480e9ebd4e83d1aa0453fd78986414b5d237aea89a8fdc35d330aa13bae"}, + {file = "coverage-7.2.1-cp37-cp37m-win_amd64.whl", hash = "sha256:6fce673f79a0e017a4dc35e18dc7bb90bf6d307c67a11ad5e61ca8d42b87cbff"}, + {file = "coverage-7.2.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7f099da6958ddfa2ed84bddea7515cb248583292e16bb9231d151cd528eab657"}, + {file = "coverage-7.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:97a3189e019d27e914ecf5c5247ea9f13261d22c3bb0cfcfd2a9b179bb36f8b1"}, + {file = "coverage-7.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a81dbcf6c6c877986083d00b834ac1e84b375220207a059ad45d12f6e518a4e3"}, + {file = "coverage-7.2.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d2c3dde4c0b9be4b02067185136b7ee4681978228ad5ec1278fa74f5ca3e99"}, + {file = "coverage-7.2.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a209d512d157379cc9ab697cbdbb4cfd18daa3e7eebaa84c3d20b6af0037384"}, + {file = "coverage-7.2.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f3d07edb912a978915576a776756069dede66d012baa503022d3a0adba1b6afa"}, + {file = "coverage-7.2.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:8dca3c1706670297851bca1acff9618455122246bdae623be31eca744ade05ec"}, + {file = "coverage-7.2.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b1991a6d64231a3e5bbe3099fb0dd7c9aeaa4275ad0e0aeff4cb9ef885c62ba2"}, + {file = "coverage-7.2.1-cp38-cp38-win32.whl", hash = "sha256:22c308bc508372576ffa3d2dbc4824bb70d28eeb4fcd79d4d1aed663a06630d0"}, + {file = "coverage-7.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:b0c0d46de5dd97f6c2d1b560bf0fcf0215658097b604f1840365296302a9d1fb"}, + {file = "coverage-7.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4dd34a935de268a133e4741827ae951283a28c0125ddcdbcbba41c4b98f2dfef"}, + {file = "coverage-7.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0f8318ed0f3c376cfad8d3520f496946977abde080439d6689d7799791457454"}, + {file = "coverage-7.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:834c2172edff5a08d78e2f53cf5e7164aacabeb66b369f76e7bb367ca4e2d993"}, + {file = "coverage-7.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4d70c853f0546855f027890b77854508bdb4d6a81242a9d804482e667fff6e6"}, + {file = "coverage-7.2.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a6450da4c7afc4534305b2b7d8650131e130610cea448ff240b6ab73d7eab63"}, + {file = "coverage-7.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:99f4dd81b2bb8fc67c3da68b1f5ee1650aca06faa585cbc6818dbf67893c6d58"}, + {file = "coverage-7.2.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bdd3f2f285ddcf2e75174248b2406189261a79e7fedee2ceeadc76219b6faa0e"}, + {file = "coverage-7.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f29351393eb05e6326f044a7b45ed8e38cb4dcc38570d12791f271399dc41431"}, + {file = "coverage-7.2.1-cp39-cp39-win32.whl", hash = "sha256:e2b50ebc2b6121edf352336d503357321b9d8738bb7a72d06fc56153fd3f4cd8"}, + {file = "coverage-7.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:bd5a12239c0006252244f94863f1c518ac256160cd316ea5c47fb1a11b25889a"}, + {file = "coverage-7.2.1-pp37.pp38.pp39-none-any.whl", hash = "sha256:436313d129db7cf5b4ac355dd2bd3f7c7e5294af077b090b85de75f8458b8616"}, + {file = "coverage-7.2.1.tar.gz", hash = "sha256:c77f2a9093ccf329dd523a9b2b3c854c20d2a3d968b6def3b820272ca6732242"}, +] +filelock = [ + {file = "filelock-3.10.0-py3-none-any.whl", hash = "sha256:e90b34656470756edf8b19656785c5fea73afa1953f3e1b0d645cef11cab3182"}, + {file = "filelock-3.10.0.tar.gz", hash = "sha256:3199fd0d3faea8b911be52b663dfccceb84c95949dd13179aa21436d1a79c4ce"}, +] +frozenlist = [ + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff8bf625fe85e119553b5383ba0fb6aa3d0ec2ae980295aaefa552374926b3f4"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dfbac4c2dfcc082fcf8d942d1e49b6aa0766c19d3358bd86e2000bf0fa4a9cf0"}, + {file = "frozenlist-1.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b1c63e8d377d039ac769cd0926558bb7068a1f7abb0f003e3717ee003ad85530"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7fdfc24dcfce5b48109867c13b4cb15e4660e7bd7661741a391f821f23dfdca7"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c926450857408e42f0bbc295e84395722ce74bae69a3b2aa2a65fe22cb14b99"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1841e200fdafc3d51f974d9d377c079a0694a8f06de2e67b48150328d66d5483"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f470c92737afa7d4c3aacc001e335062d582053d4dbe73cda126f2d7031068dd"}, + {file = "frozenlist-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:783263a4eaad7c49983fe4b2e7b53fa9770c136c270d2d4bbb6d2192bf4d9caf"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:924620eef691990dfb56dc4709f280f40baee568c794b5c1885800c3ecc69816"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ae4dc05c465a08a866b7a1baf360747078b362e6a6dbeb0c57f234db0ef88ae0"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:bed331fe18f58d844d39ceb398b77d6ac0b010d571cba8267c2e7165806b00ce"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:02c9ac843e3390826a265e331105efeab489ffaf4dd86384595ee8ce6d35ae7f"}, + {file = "frozenlist-1.3.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9545a33965d0d377b0bc823dcabf26980e77f1b6a7caa368a365a9497fb09420"}, + {file = "frozenlist-1.3.3-cp310-cp310-win32.whl", hash = "sha256:d5cd3ab21acbdb414bb6c31958d7b06b85eeb40f66463c264a9b343a4e238642"}, + {file = "frozenlist-1.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b756072364347cb6aa5b60f9bc18e94b2f79632de3b0190253ad770c5df17db1"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:b4395e2f8d83fbe0c627b2b696acce67868793d7d9750e90e39592b3626691b7"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14143ae966a6229350021384870458e4777d1eae4c28d1a7aa47f24d030e6678"}, + {file = "frozenlist-1.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5d8860749e813a6f65bad8285a0520607c9500caa23fea6ee407e63debcdbef6"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23d16d9f477bb55b6154654e0e74557040575d9d19fe78a161bd33d7d76808e8"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb82dbba47a8318e75f679690190c10a5e1f447fbf9df41cbc4c3afd726d88cb"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9309869032abb23d196cb4e4db574232abe8b8be1339026f489eeb34a4acfd91"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a97b4fe50b5890d36300820abd305694cb865ddb7885049587a5678215782a6b"}, + {file = "frozenlist-1.3.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c188512b43542b1e91cadc3c6c915a82a5eb95929134faf7fd109f14f9892ce4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:303e04d422e9b911a09ad499b0368dc551e8c3cd15293c99160c7f1f07b59a48"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0771aed7f596c7d73444c847a1c16288937ef988dc04fb9f7be4b2aa91db609d"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:66080ec69883597e4d026f2f71a231a1ee9887835902dbe6b6467d5a89216cf6"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:41fe21dc74ad3a779c3d73a2786bdf622ea81234bdd4faf90b8b03cad0c2c0b4"}, + {file = "frozenlist-1.3.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f20380df709d91525e4bee04746ba612a4df0972c1b8f8e1e8af997e678c7b81"}, + {file = "frozenlist-1.3.3-cp311-cp311-win32.whl", hash = "sha256:f30f1928162e189091cf4d9da2eac617bfe78ef907a761614ff577ef4edfb3c8"}, + {file = "frozenlist-1.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:a6394d7dadd3cfe3f4b3b186e54d5d8504d44f2d58dcc89d693698e8b7132b32"}, + {file = "frozenlist-1.3.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8df3de3a9ab8325f94f646609a66cbeeede263910c5c0de0101079ad541af332"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0693c609e9742c66ba4870bcee1ad5ff35462d5ffec18710b4ac89337ff16e27"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd4210baef299717db0a600d7a3cac81d46ef0e007f88c9335db79f8979c0d3d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:394c9c242113bfb4b9aa36e2b80a05ffa163a30691c7b5a29eba82e937895d5e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6327eb8e419f7d9c38f333cde41b9ae348bec26d840927332f17e887a8dcb70d"}, + {file = "frozenlist-1.3.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e24900aa13212e75e5b366cb9065e78bbf3893d4baab6052d1aca10d46d944c"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3843f84a6c465a36559161e6c59dce2f2ac10943040c2fd021cfb70d58c4ad56"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:84610c1502b2461255b4c9b7d5e9c48052601a8957cd0aea6ec7a7a1e1fb9420"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:c21b9aa40e08e4f63a2f92ff3748e6b6c84d717d033c7b3438dd3123ee18f70e"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:efce6ae830831ab6a22b9b4091d411698145cb9b8fc869e1397ccf4b4b6455cb"}, + {file = "frozenlist-1.3.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:40de71985e9042ca00b7953c4f41eabc3dc514a2d1ff534027f091bc74416401"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win32.whl", hash = "sha256:180c00c66bde6146a860cbb81b54ee0df350d2daf13ca85b275123bbf85de18a"}, + {file = "frozenlist-1.3.3-cp37-cp37m-win_amd64.whl", hash = "sha256:9bbbcedd75acdfecf2159663b87f1bb5cfc80e7cd99f7ddd9d66eb98b14a8411"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:034a5c08d36649591be1cbb10e09da9f531034acfe29275fc5454a3b101ce41a"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ba64dc2b3b7b158c6660d49cdb1d872d1d0bf4e42043ad8d5006099479a194e5"}, + {file = "frozenlist-1.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:47df36a9fe24054b950bbc2db630d508cca3aa27ed0566c0baf661225e52c18e"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:008a054b75d77c995ea26629ab3a0c0d7281341f2fa7e1e85fa6153ae29ae99c"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:841ea19b43d438a80b4de62ac6ab21cfe6827bb8a9dc62b896acc88eaf9cecba"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e235688f42b36be2b6b06fc37ac2126a73b75fb8d6bc66dd632aa35286238703"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca713d4af15bae6e5d79b15c10c8522859a9a89d3b361a50b817c98c2fb402a2"}, + {file = "frozenlist-1.3.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ac5995f2b408017b0be26d4a1d7c61bce106ff3d9e3324374d66b5964325448"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4ae8135b11652b08a8baf07631d3ebfe65a4c87909dbef5fa0cdde440444ee4"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4ea42116ceb6bb16dbb7d526e242cb6747b08b7710d9782aa3d6732bd8d27649"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:810860bb4bdce7557bc0febb84bbd88198b9dbc2022d8eebe5b3590b2ad6c842"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ee78feb9d293c323b59a6f2dd441b63339a30edf35abcb51187d2fc26e696d13"}, + {file = "frozenlist-1.3.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:0af2e7c87d35b38732e810befb9d797a99279cbb85374d42ea61c1e9d23094b3"}, + {file = "frozenlist-1.3.3-cp38-cp38-win32.whl", hash = "sha256:899c5e1928eec13fd6f6d8dc51be23f0d09c5281e40d9cf4273d188d9feeaf9b"}, + {file = "frozenlist-1.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:7f44e24fa70f6fbc74aeec3e971f60a14dde85da364aa87f15d1be94ae75aeef"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2b07ae0c1edaa0a36339ec6cce700f51b14a3fc6545fdd32930d2c83917332cf"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ebb86518203e12e96af765ee89034a1dbb0c3c65052d1b0c19bbbd6af8a145e1"}, + {file = "frozenlist-1.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5cf820485f1b4c91e0417ea0afd41ce5cf5965011b3c22c400f6d144296ccbc0"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c11e43016b9024240212d2a65043b70ed8dfd3b52678a1271972702d990ac6d"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fa3c6e3305aa1146b59a09b32b2e04074945ffcfb2f0931836d103a2c38f936"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:352bd4c8c72d508778cf05ab491f6ef36149f4d0cb3c56b1b4302852255d05d5"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:65a5e4d3aa679610ac6e3569e865425b23b372277f89b5ef06cf2cdaf1ebf22b"}, + {file = "frozenlist-1.3.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e2c1185858d7e10ff045c496bbf90ae752c28b365fef2c09cf0fa309291669"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f163d2fd041c630fed01bc48d28c3ed4a3b003c00acd396900e11ee5316b56bb"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:05cdb16d09a0832eedf770cb7bd1fe57d8cf4eaf5aced29c4e41e3f20b30a784"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:8bae29d60768bfa8fb92244b74502b18fae55a80eac13c88eb0b496d4268fd2d"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eedab4c310c0299961ac285591acd53dc6723a1ebd90a57207c71f6e0c2153ab"}, + {file = "frozenlist-1.3.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3bbdf44855ed8f0fbcd102ef05ec3012d6a4fd7c7562403f76ce6a52aeffb2b1"}, + {file = "frozenlist-1.3.3-cp39-cp39-win32.whl", hash = "sha256:efa568b885bca461f7c7b9e032655c0c143d305bf01c30caf6db2854a4532b38"}, + {file = "frozenlist-1.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfe33efc9cb900a4c46f91a5ceba26d6df370ffddd9ca386eb1d4f0ad97b9ea9"}, + {file = "frozenlist-1.3.3.tar.gz", hash = "sha256:58bcc55721e8a90b88332d6cd441261ebb22342e238296bb330968952fbb3a6a"}, +] +huggingface-hub = [ + {file = "huggingface_hub-0.13.2-py3-none-any.whl", hash = "sha256:745c4cbd97a27fc5c1c6c89cb477662004c88bc3dd89bafc1a27ef24af77f944"}, + {file = "huggingface_hub-0.13.2.tar.gz", hash = "sha256:246e8eb39b6e6e9d9d5846e4b56c265cdf1872f48ba5a13a1321295d371626f5"}, +] +idna = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] +importlib-metadata = [ + {file = "importlib_metadata-6.0.0-py3-none-any.whl", hash = "sha256:7efb448ec9a5e313a57655d35aa54cd3e01b7e1fbcf72dce1bf06119420f5bad"}, + {file = "importlib_metadata-6.0.0.tar.gz", hash = "sha256:e354bedeb60efa6affdcc8ae121b73544a7aa74156d047311948f6d711cd378d"}, +] +iniconfig = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] +multidict = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] +packaging = [ + {file = "packaging-23.0-py3-none-any.whl", hash = "sha256:714ac14496c3e68c99c29b00845f7a2b85f3bb6f1078fd9f72fd20f0570002b2"}, + {file = "packaging-23.0.tar.gz", hash = "sha256:b6ad297f8907de0fa2fe1ccbd26fdaf387f5f47c7275fedf8cce89f99446cf97"}, +] +pluggy = [ + {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, + {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, +] +py = [ + {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, + {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, +] +pydantic = [ + {file = "pydantic-1.10.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9289065611c48147c1dd1fd344e9d57ab45f1d99b0fb26c51f1cf72cd9bcd31"}, + {file = "pydantic-1.10.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c32b6bba301490d9bb2bf5f631907803135e8085b6aa3e5fe5a770d46dd0160"}, + {file = "pydantic-1.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd9b9e98068fa1068edfc9eabde70a7132017bdd4f362f8b4fd0abed79c33083"}, + {file = "pydantic-1.10.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c84583b9df62522829cbc46e2b22e0ec11445625b5acd70c5681ce09c9b11c4"}, + {file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b41822064585fea56d0116aa431fbd5137ce69dfe837b599e310034171996084"}, + {file = "pydantic-1.10.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:61f1f08adfaa9cc02e0cbc94f478140385cbd52d5b3c5a657c2fceb15de8d1fb"}, + {file = "pydantic-1.10.6-cp310-cp310-win_amd64.whl", hash = "sha256:32937835e525d92c98a1512218db4eed9ddc8f4ee2a78382d77f54341972c0e7"}, + {file = "pydantic-1.10.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bbd5c531b22928e63d0cb1868dee76123456e1de2f1cb45879e9e7a3f3f1779b"}, + {file = "pydantic-1.10.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e277bd18339177daa62a294256869bbe84df1fb592be2716ec62627bb8d7c81d"}, + {file = "pydantic-1.10.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f15277d720aa57e173954d237628a8d304896364b9de745dcb722f584812c7"}, + {file = "pydantic-1.10.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b243b564cea2576725e77aeeda54e3e0229a168bc587d536cd69941e6797543d"}, + {file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3ce13a558b484c9ae48a6a7c184b1ba0e5588c5525482681db418268e5f86186"}, + {file = "pydantic-1.10.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3ac1cd4deed871dfe0c5f63721e29debf03e2deefa41b3ed5eb5f5df287c7b70"}, + {file = "pydantic-1.10.6-cp311-cp311-win_amd64.whl", hash = "sha256:b1eb6610330a1dfba9ce142ada792f26bbef1255b75f538196a39e9e90388bf4"}, + {file = "pydantic-1.10.6-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4ca83739c1263a044ec8b79df4eefc34bbac87191f0a513d00dd47d46e307a65"}, + {file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea4e2a7cb409951988e79a469f609bba998a576e6d7b9791ae5d1e0619e1c0f2"}, + {file = "pydantic-1.10.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53de12b4608290992a943801d7756f18a37b7aee284b9ffa794ee8ea8153f8e2"}, + {file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:60184e80aac3b56933c71c48d6181e630b0fbc61ae455a63322a66a23c14731a"}, + {file = "pydantic-1.10.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:415a3f719ce518e95a92effc7ee30118a25c3d032455d13e121e3840985f2efd"}, + {file = "pydantic-1.10.6-cp37-cp37m-win_amd64.whl", hash = "sha256:72cb30894a34d3a7ab6d959b45a70abac8a2a93b6480fc5a7bfbd9c935bdc4fb"}, + {file = "pydantic-1.10.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3091d2eaeda25391405e36c2fc2ed102b48bac4b384d42b2267310abae350ca6"}, + {file = "pydantic-1.10.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:751f008cd2afe812a781fd6aa2fb66c620ca2e1a13b6a2152b1ad51553cb4b77"}, + {file = "pydantic-1.10.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12e837fd320dd30bd625be1b101e3b62edc096a49835392dcf418f1a5ac2b832"}, + {file = "pydantic-1.10.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d92831d0115874d766b1f5fddcdde0c5b6c60f8c6111a394078ec227fca6d"}, + {file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:476f6674303ae7965730a382a8e8d7fae18b8004b7b69a56c3d8fa93968aa21c"}, + {file = "pydantic-1.10.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3a2be0a0f32c83265fd71a45027201e1278beaa82ea88ea5b345eea6afa9ac7f"}, + {file = "pydantic-1.10.6-cp38-cp38-win_amd64.whl", hash = "sha256:0abd9c60eee6201b853b6c4be104edfba4f8f6c5f3623f8e1dba90634d63eb35"}, + {file = "pydantic-1.10.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6195ca908045054dd2d57eb9c39a5fe86409968b8040de8c2240186da0769da7"}, + {file = "pydantic-1.10.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:43cdeca8d30de9a897440e3fb8866f827c4c31f6c73838e3a01a14b03b067b1d"}, + {file = "pydantic-1.10.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c19eb5163167489cb1e0161ae9220dadd4fc609a42649e7e84a8fa8fff7a80f"}, + {file = "pydantic-1.10.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:012c99a9c0d18cfde7469aa1ebff922e24b0c706d03ead96940f5465f2c9cf62"}, + {file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:528dcf7ec49fb5a84bf6fe346c1cc3c55b0e7603c2123881996ca3ad79db5bfc"}, + {file = "pydantic-1.10.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:163e79386c3547c49366e959d01e37fc30252285a70619ffc1b10ede4758250a"}, + {file = "pydantic-1.10.6-cp39-cp39-win_amd64.whl", hash = "sha256:189318051c3d57821f7233ecc94708767dd67687a614a4e8f92b4a020d4ffd06"}, + {file = "pydantic-1.10.6-py3-none-any.whl", hash = "sha256:acc6783751ac9c9bc4680379edd6d286468a1dc8d7d9906cd6f1186ed682b2b0"}, + {file = "pydantic-1.10.6.tar.gz", hash = "sha256:cf95adb0d1671fc38d8c43dd921ad5814a735e7d9b4d9e437c088002863854fd"}, +] +pytest = [ + {file = "pytest-6.2.5-py3-none-any.whl", hash = "sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134"}, + {file = "pytest-6.2.5.tar.gz", hash = "sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89"}, +] +pytest-asyncio = [ + {file = "pytest-asyncio-0.17.2.tar.gz", hash = "sha256:6d895b02432c028e6957d25fc936494e78c6305736e785d9fee408b1efbc7ff4"}, + {file = "pytest_asyncio-0.17.2-py3-none-any.whl", hash = "sha256:e0fe5dbea40516b661ef1bcfe0bd9461c2847c4ef4bb40012324f2454fb7d56d"}, +] +pytest-cov = [ + {file = "pytest-cov-3.0.0.tar.gz", hash = "sha256:e7f0f5b1617d2210a2cabc266dfe2f4c75a8d32fb89eafb7ad9d06f6d076d470"}, + {file = "pytest_cov-3.0.0-py3-none-any.whl", hash = "sha256:578d5d15ac4a25e5f961c938b85a05b09fdaae9deef3bb6de9a6e766622ca7a6"}, +] +PyYAML = [ + {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"}, + {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"}, + {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"}, + {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"}, + {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"}, + {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"}, + {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"}, + {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"}, + {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"}, + {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"}, + {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"}, + {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"}, + {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"}, + {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"}, + {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"}, + {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"}, + {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"}, + {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"}, + {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"}, + {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"}, + {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"}, + {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"}, + {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"}, + {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"}, + {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, + {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, +] +requests = [ + {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"}, + {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"}, +] +toml = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] +tomli = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] +tqdm = [ + {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, + {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, +] +typing-extensions = [ + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, +] +urllib3 = [ + {file = "urllib3-1.26.15-py2.py3-none-any.whl", hash = "sha256:aa751d169e23c7479ce47a0cb0da579e3ede798f994f5816a74e4f4500dcea42"}, + {file = "urllib3-1.26.15.tar.gz", hash = "sha256:8a388717b9476f934a21484e8c8e61875ab60644d29b9b39e11e4b9dc1c6b305"}, +] +yarl = [ + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"}, + {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"}, + {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"}, + {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"}, + {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"}, + {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"}, + {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"}, + {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"}, + {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"}, + {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"}, + {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"}, + {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"}, + {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"}, + {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"}, + {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"}, + {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"}, + {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"}, + {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"}, + {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"}, + {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"}, + {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"}, + {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"}, + {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"}, + {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"}, + {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"}, + {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"}, + {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"}, +] +zipp = [ + {file = "zipp-3.15.0-py3-none-any.whl", hash = "sha256:48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556"}, + {file = "zipp-3.15.0.tar.gz", hash = "sha256:112929ad649da941c23de50f356a2b5570c954b65150642bccdd66bf194d224b"}, +] diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml new file mode 100644 index 00000000..2aa396b8 --- /dev/null +++ b/clients/python/pyproject.toml @@ -0,0 +1,26 @@ +[tool.poetry] +name = "text-generation" +version = "0.3.1" +description = "Hugging Face Text Generation Python Client" +license = "Apache-2.0" +authors = ["Olivier Dehaene "] +maintainers = ["Olivier Dehaene "] +readme = "README.md" +homepage = "https://github.com/huggingface/text-generation-inference" +repository = "https://github.com/huggingface/text-generation-inference" + + +[tool.poetry.dependencies] +python = "^3.7" +pydantic = "^1.10" +aiohttp = "^3.8" +huggingface-hub = ">= 0.12, < 1.0" + +[tool.poetry.dev-dependencies] +pytest = "^6.2.5" +pytest-asyncio = "^0.17.2" +pytest-cov = "^3.0.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py new file mode 100644 index 00000000..48734f0d --- /dev/null +++ b/clients/python/tests/conftest.py @@ -0,0 +1,51 @@ +import pytest + +from text_generation import __version__ +from huggingface_hub.utils import build_hf_headers + + +@pytest.fixture +def flan_t5_xxl(): + return "google/flan-t5-xxl" + + +@pytest.fixture +def fake_model(): + return "fake/model" + + +@pytest.fixture +def unsupported_model(): + return "gpt2" + + +@pytest.fixture +def base_url(): + return "https://api-inference.huggingface.co/models" + + +@pytest.fixture +def bloom_url(base_url, bloom_model): + return f"{base_url}/{bloom_model}" + + +@pytest.fixture +def flan_t5_xxl_url(base_url, flan_t5_xxl): + return f"{base_url}/{flan_t5_xxl}" + + +@pytest.fixture +def fake_url(base_url, fake_model): + return f"{base_url}/{fake_model}" + + +@pytest.fixture +def unsupported_url(base_url, unsupported_model): + return f"{base_url}/{unsupported_model}" + + +@pytest.fixture(scope="session") +def hf_headers(): + return build_hf_headers( + library_name="text-generation-tests", library_version=__version__ + ) diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py new file mode 100644 index 00000000..c998de41 --- /dev/null +++ b/clients/python/tests/test_client.py @@ -0,0 +1,133 @@ +import pytest + +from text_generation import Client, AsyncClient +from text_generation.errors import NotFoundError, ValidationError +from text_generation.types import FinishReason, PrefillToken, Token + + +def test_generate(flan_t5_xxl_url, hf_headers): + client = Client(flan_t5_xxl_url, hf_headers) + response = client.generate("test", max_new_tokens=1) + + assert response.generated_text == "" + assert response.details.finish_reason == FinishReason.Length + assert response.details.generated_tokens == 1 + assert response.details.seed is None + assert len(response.details.prefill) == 1 + assert response.details.prefill[0] == PrefillToken(id=0, text="", logprob=None) + assert len(response.details.tokens) == 1 + assert response.details.tokens[0] == Token( + id=3, text=" ", logprob=-1.984375, special=False + ) + + +def test_generate_best_of(flan_t5_xxl_url, hf_headers): + client = Client(flan_t5_xxl_url, hf_headers) + response = client.generate("test", max_new_tokens=1, best_of=2, do_sample=True) + + assert response.details.seed is not None + assert response.details.best_of_sequences is not None + assert len(response.details.best_of_sequences) == 1 + assert response.details.best_of_sequences[0].seed is not None + + +def test_generate_not_found(fake_url, hf_headers): + client = Client(fake_url, hf_headers) + with pytest.raises(NotFoundError): + client.generate("test") + + +def test_generate_validation_error(flan_t5_xxl_url, hf_headers): + client = Client(flan_t5_xxl_url, hf_headers) + with pytest.raises(ValidationError): + client.generate("test", max_new_tokens=10_000) + + +def test_generate_stream(flan_t5_xxl_url, hf_headers): + client = Client(flan_t5_xxl_url, hf_headers) + responses = [ + response for response in client.generate_stream("test", max_new_tokens=1) + ] + + assert len(responses) == 1 + response = responses[0] + + assert response.generated_text == "" + assert response.details.finish_reason == FinishReason.Length + assert response.details.generated_tokens == 1 + assert response.details.seed is None + + +def test_generate_stream_not_found(fake_url, hf_headers): + client = Client(fake_url, hf_headers) + with pytest.raises(NotFoundError): + list(client.generate_stream("test")) + + +def test_generate_stream_validation_error(flan_t5_xxl_url, hf_headers): + client = Client(flan_t5_xxl_url, hf_headers) + with pytest.raises(ValidationError): + list(client.generate_stream("test", max_new_tokens=10_000)) + + +@pytest.mark.asyncio +async def test_generate_async(flan_t5_xxl_url, hf_headers): + client = AsyncClient(flan_t5_xxl_url, hf_headers) + response = await client.generate("test", max_new_tokens=1) + + assert response.generated_text == "" + assert response.details.finish_reason == FinishReason.Length + assert response.details.generated_tokens == 1 + assert response.details.seed is None + assert len(response.details.prefill) == 1 + assert response.details.prefill[0] == PrefillToken(id=0, text="", logprob=None) + assert len(response.details.tokens) == 1 + assert response.details.tokens[0] == Token( + id=3, text=" ", logprob=-1.984375, special=False + ) + + +@pytest.mark.asyncio +async def test_generate_async_not_found(fake_url, hf_headers): + client = AsyncClient(fake_url, hf_headers) + with pytest.raises(NotFoundError): + await client.generate("test") + + +@pytest.mark.asyncio +async def test_generate_async_validation_error(flan_t5_xxl_url, hf_headers): + client = AsyncClient(flan_t5_xxl_url, hf_headers) + with pytest.raises(ValidationError): + await client.generate("test", max_new_tokens=10_000) + + +@pytest.mark.asyncio +async def test_generate_stream_async(flan_t5_xxl_url, hf_headers): + client = AsyncClient(flan_t5_xxl_url, hf_headers) + responses = [ + response async for response in client.generate_stream("test", max_new_tokens=1) + ] + + assert len(responses) == 1 + response = responses[0] + + assert response.generated_text == "" + assert response.details.finish_reason == FinishReason.Length + assert response.details.generated_tokens == 1 + assert response.details.seed is None + + +@pytest.mark.asyncio +async def test_generate_stream_async_not_found(fake_url, hf_headers): + client = AsyncClient(fake_url, hf_headers) + with pytest.raises(NotFoundError): + async for _ in client.generate_stream("test"): + pass + + +@pytest.mark.asyncio +async def test_generate_stream_async_validation_error(flan_t5_xxl_url, hf_headers): + client = AsyncClient(flan_t5_xxl_url, hf_headers) + with pytest.raises(ValidationError): + async for _ in client.generate_stream("test", max_new_tokens=10_000): + pass diff --git a/clients/python/tests/test_errors.py b/clients/python/tests/test_errors.py new file mode 100644 index 00000000..8389ed31 --- /dev/null +++ b/clients/python/tests/test_errors.py @@ -0,0 +1,64 @@ +from text_generation.errors import ( + parse_error, + GenerationError, + IncompleteGenerationError, + OverloadedError, + ValidationError, + BadRequestError, + ShardNotReadyError, + ShardTimeoutError, + NotFoundError, + RateLimitExceededError, + UnknownError, +) + + +def test_generation_error(): + payload = {"error_type": "generation", "error": "test"} + assert isinstance(parse_error(400, payload), GenerationError) + + +def test_incomplete_generation_error(): + payload = {"error_type": "incomplete_generation", "error": "test"} + assert isinstance(parse_error(400, payload), IncompleteGenerationError) + + +def test_overloaded_error(): + payload = {"error_type": "overloaded", "error": "test"} + assert isinstance(parse_error(400, payload), OverloadedError) + + +def test_validation_error(): + payload = {"error_type": "validation", "error": "test"} + assert isinstance(parse_error(400, payload), ValidationError) + + +def test_bad_request_error(): + payload = {"error": "test"} + assert isinstance(parse_error(400, payload), BadRequestError) + + +def test_shard_not_ready_error(): + payload = {"error": "test"} + assert isinstance(parse_error(403, payload), ShardNotReadyError) + assert isinstance(parse_error(424, payload), ShardNotReadyError) + + +def test_shard_timeout_error(): + payload = {"error": "test"} + assert isinstance(parse_error(504, payload), ShardTimeoutError) + + +def test_not_found_error(): + payload = {"error": "test"} + assert isinstance(parse_error(404, payload), NotFoundError) + + +def test_rate_limit_exceeded_error(): + payload = {"error": "test"} + assert isinstance(parse_error(429, payload), RateLimitExceededError) + + +def test_unknown_error(): + payload = {"error": "test"} + assert isinstance(parse_error(500, payload), UnknownError) diff --git a/clients/python/tests/test_inference_api.py b/clients/python/tests/test_inference_api.py new file mode 100644 index 00000000..79e503a3 --- /dev/null +++ b/clients/python/tests/test_inference_api.py @@ -0,0 +1,34 @@ +import pytest + +from text_generation import ( + InferenceAPIClient, + InferenceAPIAsyncClient, + Client, + AsyncClient, +) +from text_generation.errors import NotSupportedError +from text_generation.inference_api import get_supported_models + + +def test_get_supported_models(): + assert isinstance(get_supported_models(), list) + + +def test_client(flan_t5_xxl): + client = InferenceAPIClient(flan_t5_xxl) + assert isinstance(client, Client) + + +def test_client_unsupported_model(unsupported_model): + with pytest.raises(NotSupportedError): + InferenceAPIClient(unsupported_model) + + +def test_async_client(flan_t5_xxl): + client = InferenceAPIAsyncClient(flan_t5_xxl) + assert isinstance(client, AsyncClient) + + +def test_async_client_unsupported_model(unsupported_model): + with pytest.raises(NotSupportedError): + InferenceAPIAsyncClient(unsupported_model) diff --git a/clients/python/tests/test_types.py b/clients/python/tests/test_types.py new file mode 100644 index 00000000..4c9d4c89 --- /dev/null +++ b/clients/python/tests/test_types.py @@ -0,0 +1,82 @@ +import pytest + +from text_generation.types import Parameters, Request +from text_generation.errors import ValidationError + + +def test_parameters_validation(): + # Test best_of + Parameters(best_of=1) + with pytest.raises(ValidationError): + Parameters(best_of=0) + with pytest.raises(ValidationError): + Parameters(best_of=-1) + Parameters(best_of=2, do_sample=True) + with pytest.raises(ValidationError): + Parameters(best_of=2) + + # Test repetition_penalty + Parameters(repetition_penalty=1) + with pytest.raises(ValidationError): + Parameters(repetition_penalty=0) + with pytest.raises(ValidationError): + Parameters(repetition_penalty=-1) + + # Test seed + Parameters(seed=1) + with pytest.raises(ValidationError): + Parameters(seed=-1) + + # Test temperature + Parameters(temperature=1) + with pytest.raises(ValidationError): + Parameters(temperature=0) + with pytest.raises(ValidationError): + Parameters(temperature=-1) + + # Test top_k + Parameters(top_k=1) + with pytest.raises(ValidationError): + Parameters(top_k=0) + with pytest.raises(ValidationError): + Parameters(top_k=-1) + + # Test top_p + Parameters(top_p=0.5) + with pytest.raises(ValidationError): + Parameters(top_p=0) + with pytest.raises(ValidationError): + Parameters(top_p=-1) + with pytest.raises(ValidationError): + Parameters(top_p=1) + + # Test truncate + Parameters(truncate=1) + with pytest.raises(ValidationError): + Parameters(truncate=0) + with pytest.raises(ValidationError): + Parameters(truncate=-1) + + # Test typical_p + Parameters(typical_p=0.5) + with pytest.raises(ValidationError): + Parameters(typical_p=0) + with pytest.raises(ValidationError): + Parameters(typical_p=-1) + with pytest.raises(ValidationError): + Parameters(typical_p=1) + + +def test_request_validation(): + Request(inputs="test") + + with pytest.raises(ValidationError): + Request(inputs="") + + Request(inputs="test", stream=True) + Request(inputs="test", parameters=Parameters(best_of=2, do_sample=True)) + + with pytest.raises(ValidationError): + Request( + inputs="test", parameters=Parameters(best_of=2, do_sample=True), stream=True + ) diff --git a/clients/python/text_generation/__init__.py b/clients/python/text_generation/__init__.py new file mode 100644 index 00000000..46109833 --- /dev/null +++ b/clients/python/text_generation/__init__.py @@ -0,0 +1,18 @@ +# Copyright 2023 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "0.3.0" + +from text_generation.client import Client, AsyncClient +from text_generation.inference_api import InferenceAPIClient, InferenceAPIAsyncClient diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py new file mode 100644 index 00000000..03bc3888 --- /dev/null +++ b/clients/python/text_generation/client.py @@ -0,0 +1,487 @@ +import json +import requests + +from aiohttp import ClientSession, ClientTimeout +from pydantic import ValidationError +from typing import Dict, Optional, List, AsyncIterator, Iterator + +from text_generation.types import ( + StreamResponse, + Response, + Request, + Parameters, +) +from text_generation.errors import parse_error + + +class Client: + """Client to make calls to a text-generation-inference instance + + Example: + + ```python + >>> from text_generation import Client + + >>> client = Client("https://api-inference.huggingface.co/models/bigscience/bloomz") + >>> client.generate("Why is the sky blue?").generated_text + ' Rayleigh scattering' + + >>> result = "" + >>> for response in client.generate_stream("Why is the sky blue?"): + >>> if not response.token.special: + >>> result += response.token.text + >>> result + ' Rayleigh scattering' + ``` + """ + + def __init__( + self, + base_url: str, + headers: Optional[Dict[str, str]] = None, + cookies: Optional[Dict[str, str]] = None, + timeout: int = 10, + ): + """ + Args: + base_url (`str`): + text-generation-inference instance base url + headers (`Optional[Dict[str, str]]`): + Additional headers + cookies (`Optional[Dict[str, str]]`): + Cookies to include in the requests + timeout (`int`): + Timeout in seconds + """ + self.base_url = base_url + self.headers = headers + self.cookies = cookies + self.timeout = timeout + + def generate( + self, + prompt: str, + do_sample: bool = False, + max_new_tokens: int = 20, + best_of: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: bool = False, + seed: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: bool = False, + ) -> Response: + """ + Given a prompt, generate the following text + + Args: + prompt (`str`): + Input text + do_sample (`bool`): + Activate logits sampling + max_new_tokens (`int`): + Maximum number of generated tokens + best_of (`int`): + Generate best_of sequences and return the one if the highest token logprobs + repetition_penalty (`float`): + The parameter for repetition penalty. 1.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + return_full_text (`bool`): + Whether to prepend the prompt to the generated text + seed (`int`): + Random sampling seed + stop_sequences (`List[str]`): + Stop generating tokens if a member of `stop_sequences` is generated + temperature (`float`): + The value used to module the logits distribution. + top_k (`int`): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation. + truncate (`int`): + Truncate inputs tokens to the given size + typical_p (`float`): + Typical Decoding mass + See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + watermark (`bool`): + Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + + Returns: + Response: generated response + """ + # Validate parameters + parameters = Parameters( + best_of=best_of, + details=True, + do_sample=do_sample, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + return_full_text=return_full_text, + seed=seed, + stop=stop_sequences if stop_sequences is not None else [], + temperature=temperature, + top_k=top_k, + top_p=top_p, + truncate=truncate, + typical_p=typical_p, + watermark=watermark, + ) + request = Request(inputs=prompt, stream=False, parameters=parameters) + + resp = requests.post( + self.base_url, + json=request.dict(), + headers=self.headers, + cookies=self.cookies, + timeout=self.timeout, + ) + payload = resp.json() + if resp.status_code != 200: + raise parse_error(resp.status_code, payload) + return Response(**payload[0]) + + def generate_stream( + self, + prompt: str, + do_sample: bool = False, + max_new_tokens: int = 20, + best_of: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: bool = False, + seed: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: bool = False, + ) -> Iterator[StreamResponse]: + """ + Given a prompt, generate the following stream of tokens + + Args: + prompt (`str`): + Input text + do_sample (`bool`): + Activate logits sampling + max_new_tokens (`int`): + Maximum number of generated tokens + best_of (`int`): + Generate best_of sequences and return the one if the highest token logprobs + repetition_penalty (`float`): + The parameter for repetition penalty. 1.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + return_full_text (`bool`): + Whether to prepend the prompt to the generated text + seed (`int`): + Random sampling seed + stop_sequences (`List[str]`): + Stop generating tokens if a member of `stop_sequences` is generated + temperature (`float`): + The value used to module the logits distribution. + top_k (`int`): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation. + truncate (`int`): + Truncate inputs tokens to the given size + typical_p (`float`): + Typical Decoding mass + See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + watermark (`bool`): + Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + + Returns: + Iterator[StreamResponse]: stream of generated tokens + """ + # Validate parameters + parameters = Parameters( + best_of=best_of, + details=True, + do_sample=do_sample, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + return_full_text=return_full_text, + seed=seed, + stop=stop_sequences if stop_sequences is not None else [], + temperature=temperature, + top_k=top_k, + top_p=top_p, + truncate=truncate, + typical_p=typical_p, + watermark=watermark, + ) + request = Request(inputs=prompt, stream=True, parameters=parameters) + + resp = requests.post( + self.base_url, + json=request.dict(), + headers=self.headers, + cookies=self.cookies, + timeout=self.timeout, + stream=True, + ) + + if resp.status_code != 200: + raise parse_error(resp.status_code, resp.json()) + + # Parse ServerSentEvents + for byte_payload in resp.iter_lines(): + # Skip line + if byte_payload == b"\n": + continue + + payload = byte_payload.decode("utf-8") + + # Event data + if payload.startswith("data:"): + # Decode payload + json_payload = json.loads(payload.lstrip("data:").rstrip("/n")) + # Parse payload + try: + response = StreamResponse(**json_payload) + except ValidationError: + # If we failed to parse the payload, then it is an error payload + raise parse_error(resp.status_code, json_payload) + yield response + + +class AsyncClient: + """Asynchronous Client to make calls to a text-generation-inference instance + + Example: + + ```python + >>> from text_generation import AsyncClient + + >>> client = AsyncClient("https://api-inference.huggingface.co/models/bigscience/bloomz") + >>> response = await client.generate("Why is the sky blue?") + >>> response.generated_text + ' Rayleigh scattering' + + >>> result = "" + >>> async for response in client.generate_stream("Why is the sky blue?"): + >>> if not response.token.special: + >>> result += response.token.text + >>> result + ' Rayleigh scattering' + ``` + """ + + def __init__( + self, + base_url: str, + headers: Optional[Dict[str, str]] = None, + cookies: Optional[Dict[str, str]] = None, + timeout: int = 10, + ): + """ + Args: + base_url (`str`): + text-generation-inference instance base url + headers (`Optional[Dict[str, str]]`): + Additional headers + cookies (`Optional[Dict[str, str]]`): + Cookies to include in the requests + timeout (`int`): + Timeout in seconds + """ + self.base_url = base_url + self.headers = headers + self.cookies = cookies + self.timeout = ClientTimeout(timeout * 60) + + async def generate( + self, + prompt: str, + do_sample: bool = False, + max_new_tokens: int = 20, + best_of: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: bool = False, + seed: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: bool = False, + ) -> Response: + """ + Given a prompt, generate the following text asynchronously + + Args: + prompt (`str`): + Input text + do_sample (`bool`): + Activate logits sampling + max_new_tokens (`int`): + Maximum number of generated tokens + best_of (`int`): + Generate best_of sequences and return the one if the highest token logprobs + repetition_penalty (`float`): + The parameter for repetition penalty. 1.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + return_full_text (`bool`): + Whether to prepend the prompt to the generated text + seed (`int`): + Random sampling seed + stop_sequences (`List[str]`): + Stop generating tokens if a member of `stop_sequences` is generated + temperature (`float`): + The value used to module the logits distribution. + top_k (`int`): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation. + truncate (`int`): + Truncate inputs tokens to the given size + typical_p (`float`): + Typical Decoding mass + See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + watermark (`bool`): + Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + + Returns: + Response: generated response + """ + # Validate parameters + parameters = Parameters( + best_of=best_of, + details=True, + do_sample=do_sample, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + return_full_text=return_full_text, + seed=seed, + stop=stop_sequences if stop_sequences is not None else [], + temperature=temperature, + top_k=top_k, + top_p=top_p, + truncate=truncate, + typical_p=typical_p, + watermark=watermark, + ) + request = Request(inputs=prompt, stream=False, parameters=parameters) + + async with ClientSession( + headers=self.headers, cookies=self.cookies, timeout=self.timeout + ) as session: + async with session.post(self.base_url, json=request.dict()) as resp: + payload = await resp.json() + + if resp.status != 200: + raise parse_error(resp.status, payload) + return Response(**payload[0]) + + async def generate_stream( + self, + prompt: str, + do_sample: bool = False, + max_new_tokens: int = 20, + best_of: Optional[int] = None, + repetition_penalty: Optional[float] = None, + return_full_text: bool = False, + seed: Optional[int] = None, + stop_sequences: Optional[List[str]] = None, + temperature: Optional[float] = None, + top_k: Optional[int] = None, + top_p: Optional[float] = None, + truncate: Optional[int] = None, + typical_p: Optional[float] = None, + watermark: bool = False, + ) -> AsyncIterator[StreamResponse]: + """ + Given a prompt, generate the following stream of tokens asynchronously + + Args: + prompt (`str`): + Input text + do_sample (`bool`): + Activate logits sampling + max_new_tokens (`int`): + Maximum number of generated tokens + best_of (`int`): + Generate best_of sequences and return the one if the highest token logprobs + repetition_penalty (`float`): + The parameter for repetition penalty. 1.0 means no penalty. See [this + paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + return_full_text (`bool`): + Whether to prepend the prompt to the generated text + seed (`int`): + Random sampling seed + stop_sequences (`List[str]`): + Stop generating tokens if a member of `stop_sequences` is generated + temperature (`float`): + The value used to module the logits distribution. + top_k (`int`): + The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_p (`float`): + If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + higher are kept for generation. + truncate (`int`): + Truncate inputs tokens to the given size + typical_p (`float`): + Typical Decoding mass + See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + watermark (`bool`): + Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + + Returns: + AsyncIterator[StreamResponse]: stream of generated tokens + """ + # Validate parameters + parameters = Parameters( + best_of=best_of, + details=True, + do_sample=do_sample, + max_new_tokens=max_new_tokens, + repetition_penalty=repetition_penalty, + return_full_text=return_full_text, + seed=seed, + stop=stop_sequences if stop_sequences is not None else [], + temperature=temperature, + top_k=top_k, + top_p=top_p, + truncate=truncate, + typical_p=typical_p, + watermark=watermark, + ) + request = Request(inputs=prompt, stream=True, parameters=parameters) + + async with ClientSession( + headers=self.headers, cookies=self.cookies, timeout=self.timeout + ) as session: + async with session.post(self.base_url, json=request.dict()) as resp: + + if resp.status != 200: + raise parse_error(resp.status, await resp.json()) + + # Parse ServerSentEvents + async for byte_payload in resp.content: + # Skip line + if byte_payload == b"\n": + continue + + payload = byte_payload.decode("utf-8") + + # Event data + if payload.startswith("data:"): + # Decode payload + json_payload = json.loads(payload.lstrip("data:").rstrip("/n")) + # Parse payload + try: + response = StreamResponse(**json_payload) + except ValidationError: + # If we failed to parse the payload, then it is an error payload + raise parse_error(resp.status, json_payload) + yield response diff --git a/clients/python/text_generation/errors.py b/clients/python/text_generation/errors.py new file mode 100644 index 00000000..dbf0b761 --- /dev/null +++ b/clients/python/text_generation/errors.py @@ -0,0 +1,106 @@ +from typing import Dict + + +# Text Generation Inference Errors +class ValidationError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class GenerationError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class OverloadedError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class IncompleteGenerationError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +# API Inference Errors +class BadRequestError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class ShardNotReadyError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class ShardTimeoutError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class NotFoundError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class RateLimitExceededError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +class NotSupportedError(Exception): + def __init__(self, model_id: str): + message = ( + f"Model `{model_id}` is not available for inference with this client. \n" + "Use `huggingface_hub.inference_api.InferenceApi` instead." + ) + super(NotSupportedError, self).__init__(message) + + +# Unknown error +class UnknownError(Exception): + def __init__(self, message: str): + super().__init__(message) + + +def parse_error(status_code: int, payload: Dict[str, str]) -> Exception: + """ + Parse error given an HTTP status code and a json payload + + Args: + status_code (`int`): + HTTP status code + payload (`Dict[str, str]`): + Json payload + + Returns: + Exception: parsed exception + + """ + # Try to parse a Text Generation Inference error + message = payload["error"] + if "error_type" in payload: + error_type = payload["error_type"] + if error_type == "generation": + return GenerationError(message) + if error_type == "incomplete_generation": + return IncompleteGenerationError(message) + if error_type == "overloaded": + return OverloadedError(message) + if error_type == "validation": + return ValidationError(message) + + # Try to parse a APIInference error + if status_code == 400: + return BadRequestError(message) + if status_code == 403 or status_code == 424: + return ShardNotReadyError(message) + if status_code == 504: + return ShardTimeoutError(message) + if status_code == 404: + return NotFoundError(message) + if status_code == 429: + return RateLimitExceededError(message) + + # Fallback to an unknown error + return UnknownError(message) diff --git a/clients/python/text_generation/inference_api.py b/clients/python/text_generation/inference_api.py new file mode 100644 index 00000000..eb70b3d1 --- /dev/null +++ b/clients/python/text_generation/inference_api.py @@ -0,0 +1,154 @@ +import os +import requests +import base64 +import json +import warnings + +from typing import List, Optional +from huggingface_hub.utils import build_hf_headers + +from text_generation import Client, AsyncClient, __version__ +from text_generation.errors import NotSupportedError + +INFERENCE_ENDPOINT = os.environ.get( + "HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co" +) + +SUPPORTED_MODELS = None + + +def get_supported_models() -> Optional[List[str]]: + """ + Get the list of supported text-generation models from GitHub + + Returns: + Optional[List[str]]: supported models list or None if unable to get the list from GitHub + """ + global SUPPORTED_MODELS + if SUPPORTED_MODELS is not None: + return SUPPORTED_MODELS + + response = requests.get( + "https://api.github.com/repos/huggingface/text-generation-inference/contents/supported_models.json", + timeout=5, + ) + if response.status_code == 200: + file_content = response.json()["content"] + SUPPORTED_MODELS = json.loads(base64.b64decode(file_content).decode("utf-8")) + return SUPPORTED_MODELS + + warnings.warn("Could not retrieve list of supported models.") + return None + + +class InferenceAPIClient(Client): + """Client to make calls to the HuggingFace Inference API. + + Only supports a subset of the available text-generation or text2text-generation models that are served using + text-generation-inference + + Example: + + ```python + >>> from text_generation import InferenceAPIClient + + >>> client = InferenceAPIClient("bigscience/bloomz") + >>> client.generate("Why is the sky blue?").generated_text + ' Rayleigh scattering' + + >>> result = "" + >>> for response in client.generate_stream("Why is the sky blue?"): + >>> if not response.token.special: + >>> result += response.token.text + >>> result + ' Rayleigh scattering' + ``` + """ + + def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10): + """ + Init headers and API information + + Args: + repo_id (`str`): + Id of repository (e.g. `bigscience/bloom`). + token (`str`, `optional`): + The API token to use as HTTP bearer authorization. This is not + the authentication token. You can find the token in + https://huggingface.co/settings/token. Alternatively, you can + find both your organizations and personal API tokens using + `HfApi().whoami(token)`. + timeout (`int`): + Timeout in seconds + """ + + # Text Generation Inference client only supports a subset of the available hub models + supported_models = get_supported_models() + if supported_models is not None and repo_id not in supported_models: + raise NotSupportedError(repo_id) + + headers = build_hf_headers( + token=token, library_name="text-generation", library_version=__version__ + ) + base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}" + + super(InferenceAPIClient, self).__init__( + base_url, headers=headers, timeout=timeout + ) + + +class InferenceAPIAsyncClient(AsyncClient): + """Aynschronous Client to make calls to the HuggingFace Inference API. + + Only supports a subset of the available text-generation or text2text-generation models that are served using + text-generation-inference + + Example: + + ```python + >>> from text_generation import InferenceAPIAsyncClient + + >>> client = InferenceAPIAsyncClient("bigscience/bloomz") + >>> response = await client.generate("Why is the sky blue?") + >>> response.generated_text + ' Rayleigh scattering' + + >>> result = "" + >>> async for response in client.generate_stream("Why is the sky blue?"): + >>> if not response.token.special: + >>> result += response.token.text + >>> result + ' Rayleigh scattering' + ``` + """ + + def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10): + """ + Init headers and API information + + Args: + repo_id (`str`): + Id of repository (e.g. `bigscience/bloom`). + token (`str`, `optional`): + The API token to use as HTTP bearer authorization. This is not + the authentication token. You can find the token in + https://huggingface.co/settings/token. Alternatively, you can + find both your organizations and personal API tokens using + `HfApi().whoami(token)`. + timeout (`int`): + Timeout in seconds + """ + + # Text Generation Inference client only supports a subset of the available hub models + supported_models = get_supported_models() + if supported_models is not None and repo_id not in supported_models: + raise NotSupportedError(repo_id) + + headers = build_hf_headers( + token=token, library_name="text-generation", library_version=__version__ + ) + base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}" + + super(InferenceAPIAsyncClient, self).__init__( + base_url, headers=headers, timeout=timeout + ) diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py new file mode 100644 index 00000000..ea2070b8 --- /dev/null +++ b/clients/python/text_generation/types.py @@ -0,0 +1,223 @@ +from enum import Enum +from pydantic import BaseModel, validator +from typing import Optional, List + +from text_generation.errors import ValidationError + + +class Parameters(BaseModel): + # Activate logits sampling + do_sample: bool = False + # Maximum number of generated tokens + max_new_tokens: int = 20 + # The parameter for repetition penalty. 1.0 means no penalty. + # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details. + repetition_penalty: Optional[float] = None + # Whether to prepend the prompt to the generated text + return_full_text: bool = False + # Stop generating tokens if a member of `stop_sequences` is generated + stop: List[str] = [] + # Random sampling seed + seed: Optional[int] + # The value used to module the logits distribution. + temperature: Optional[float] + # The number of highest probability vocabulary tokens to keep for top-k-filtering. + top_k: Optional[int] + # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or + # higher are kept for generation. + top_p: Optional[float] + # truncate inputs tokens to the given size + truncate: Optional[int] + # Typical Decoding mass + # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information + typical_p: Optional[float] + # Generate best_of sequences and return the one if the highest token logprobs + best_of: Optional[int] + # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) + watermark: bool = False + # Get generation details + details: bool = False + + @validator("best_of") + def valid_best_of(cls, field_value, values): + if field_value is not None: + if field_value <= 0: + raise ValidationError("`best_of` must be strictly positive") + sampling = ( + values["do_sample"] + | (values["temperature"] is not None) + | (values["top_k"] is not None) + | (values["top_p"] is not None) + | (values["typical_p"] is not None) + ) + if field_value > 1 and not sampling: + raise ValidationError("you must use sampling when `best_of` is > 1") + + return field_value + + @validator("repetition_penalty") + def valid_repetition_penalty(cls, v): + if v is not None and v <= 0: + raise ValidationError("`repetition_penalty` must be strictly positive") + return v + + @validator("seed") + def valid_seed(cls, v): + if v is not None and v < 0: + raise ValidationError("`seed` must be positive") + return v + + @validator("temperature") + def valid_temp(cls, v): + if v is not None and v <= 0: + raise ValidationError("`temperature` must be strictly positive") + return v + + @validator("top_k") + def valid_top_k(cls, v): + if v is not None and v <= 0: + raise ValidationError("`top_k` must be strictly positive") + return v + + @validator("top_p") + def valid_top_p(cls, v): + if v is not None and (v <= 0 or v >= 1.0): + raise ValidationError("`top_p` must be > 0.0 and < 1.0") + return v + + @validator("truncate") + def valid_truncate(cls, v): + if v is not None and v <= 0: + raise ValidationError("`truncate` must be strictly positive") + return v + + @validator("typical_p") + def valid_typical_p(cls, v): + if v is not None and (v <= 0 or v >= 1.0): + raise ValidationError("`typical_p` must be > 0.0 and < 1.0") + return v + + +class Request(BaseModel): + # Prompt + inputs: str + # Generation parameters + parameters: Optional[Parameters] + # Whether to stream output tokens + stream: bool = False + + @validator("inputs") + def valid_input(cls, v): + if not v: + raise ValidationError("`inputs` cannot be empty") + return v + + @validator("stream") + def valid_best_of_stream(cls, field_value, values): + parameters = values["parameters"] + if ( + parameters is not None + and parameters.best_of is not None + and parameters.best_of > 1 + and field_value + ): + raise ValidationError( + "`best_of` != 1 is not supported when `stream` == True" + ) + return field_value + + +# Prompt tokens +class PrefillToken(BaseModel): + # Token ID from the model tokenizer + id: int + # Token text + text: str + # Logprob + # Optional since the logprob of the first token cannot be computed + logprob: Optional[float] + + +# Generated tokens +class Token(BaseModel): + # Token ID from the model tokenizer + id: int + # Token text + text: str + # Logprob + logprob: float + # Is the token a special token + # Can be used to ignore tokens when concatenating + special: bool + + +# Generation finish reason +class FinishReason(Enum): + # number of generated tokens == `max_new_tokens` + Length = "length" + # the model generated its end of sequence token + EndOfSequenceToken = "eos_token" + # the model generated a text included in `stop_sequences` + StopSequence = "stop_sequence" + + +# Additional sequences when using the `best_of` parameter +class BestOfSequence(BaseModel): + # Generated text + generated_text: str + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + # Prompt tokens + prefill: List[PrefillToken] + # Generated tokens + tokens: List[Token] + + +# `generate` details +class Details(BaseModel): + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + # Prompt tokens + prefill: List[PrefillToken] + # Generated tokens + tokens: List[Token] + # Additional sequences when using the `best_of` parameter + best_of_sequences: Optional[List[BestOfSequence]] + + +# `generate` return value +class Response(BaseModel): + # Generated text + generated_text: str + # Generation details + details: Details + + +# `generate_stream` details +class StreamDetails(BaseModel): + # Generation finish reason + finish_reason: FinishReason + # Number of generated tokens + generated_tokens: int + # Sampling seed if sampling was activated + seed: Optional[int] + + +# `generate_stream` return value +class StreamResponse(BaseModel): + # Generated token + token: Token + # Complete generated text + # Only available when the generation is finished + generated_text: Optional[str] + # Generation details + # Only available when the generation is finished + details: Optional[StreamDetails] diff --git a/docs/openapi.json b/docs/openapi.json index b4ef3ba6..a2230a07 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -11,7 +11,7 @@ "name": "Apache 2.0", "url": "https://www.apache.org/licenses/LICENSE-2.0" }, - "version": "0.2.1" + "version": "0.4.0" }, "paths": { "/generate": { @@ -38,10 +38,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/GenerateResponse" - } + "$ref": "#/components/schemas/GenerateResponse" } } } @@ -51,10 +48,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Input validation error" @@ -67,10 +61,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Request failed during generation" @@ -83,10 +74,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Model is overloaded" @@ -99,10 +87,7 @@ "content": { "application/json": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Incomplete generation" @@ -136,12 +121,9 @@ "200": { "description": "Generated Text", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/StreamResponse" - } + "$ref": "#/components/schemas/StreamResponse" } } } @@ -149,12 +131,9 @@ "422": { "description": "Input validation error", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Input validation error" @@ -165,12 +144,9 @@ "424": { "description": "Generation Error", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Request failed during generation" @@ -181,12 +157,9 @@ "429": { "description": "Model is overloaded", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Model is overloaded" @@ -197,12 +170,9 @@ "500": { "description": "Incomplete generation", "content": { - "text/event-stream ": { + "text/event-stream": { "schema": { - "type": "array", - "items": { - "$ref": "#/components/schemas/ErrorResponse" - } + "$ref": "#/components/schemas/ErrorResponse" }, "example": { "error": "Incomplete generation" @@ -213,17 +183,90 @@ }, "deprecated": false } + }, + "/metrics": { + "get": { + "tags": [ + "Text Generation Inference" + ], + "summary": "Prometheus metrics scrape endpoint", + "description": "Prometheus metrics scrape endpoint", + "operationId": "metrics", + "responses": { + "200": { + "description": "Prometheus Metrics", + "content": { + "text/plain": { + "schema": { + "type": "string" + } + } + } + } + }, + "deprecated": false + } } }, "components": { "schemas": { + "BestOfSequence": { + "type": "object", + "required": [ + "generated_text", + "finish_reason", + "generated_tokens", + "prefill", + "tokens" + ], + "properties": { + "finish_reason": { + "$ref": "#/components/schemas/FinishReason" + }, + "generated_text": { + "type": "string", + "example": "test" + }, + "generated_tokens": { + "type": "integer", + "format": "int32", + "example": 1 + }, + "prefill": { + "type": "array", + "items": { + "$ref": "#/components/schemas/PrefillToken" + } + }, + "seed": { + "type": "integer", + "format": "int64", + "example": 42, + "nullable": true + }, + "tokens": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Token" + } + } + } + }, "Details": { "type": "object", "required": [ "finish_reason", - "generated_tokens" + "generated_tokens", + "prefill", + "tokens" ], "properties": { + "best_of_sequences": { + "type": "array", + "items": { + "$ref": "#/components/schemas/BestOfSequence" + } + }, "finish_reason": { "$ref": "#/components/schemas/FinishReason" }, @@ -235,13 +278,14 @@ "prefill": { "type": "array", "items": { - "$ref": "#/components/schemas/Token" + "$ref": "#/components/schemas/PrefillToken" } }, "seed": { "type": "integer", "format": "int64", - "example": 42 + "example": 42, + "nullable": true }, "tokens": { "type": "array", @@ -254,11 +298,15 @@ "ErrorResponse": { "type": "object", "required": [ - "error" + "error", + "error_type" ], "properties": { "error": { "type": "string" + }, + "error_type": { + "type": "string" } } }, @@ -273,6 +321,13 @@ "GenerateParameters": { "type": "object", "properties": { + "best_of": { + "type": "integer", + "default": "null", + "example": 1, + "nullable": true, + "exclusiveMinimum": 0.0 + }, "details": { "type": "boolean", "default": "true" @@ -297,9 +352,19 @@ "nullable": true, "exclusiveMinimum": 0.0 }, + "return_full_text": { + "type": "boolean", + "default": "null", + "example": false, + "nullable": true + }, "seed": { "type": "integer", - "format": "int64" + "format": "int64", + "default": "null", + "example": "null", + "nullable": true, + "exclusiveMinimum": 0.0 }, "stop": { "type": "array", @@ -335,6 +400,26 @@ "nullable": true, "maximum": 1.0, "exclusiveMinimum": 0.0 + }, + "truncate": { + "type": "integer", + "default": "null", + "example": "null", + "nullable": true + }, + "typical_p": { + "type": "number", + "format": "float", + "default": "null", + "example": 0.95, + "nullable": true, + "maximum": 1.0, + "exclusiveMinimum": 0.0 + }, + "watermark": { + "type": "boolean", + "default": "false", + "example": true } } }, @@ -368,6 +453,31 @@ } } }, + "PrefillToken": { + "type": "object", + "required": [ + "id", + "text", + "logprob" + ], + "properties": { + "id": { + "type": "integer", + "format": "int32", + "example": 0 + }, + "logprob": { + "type": "number", + "format": "float", + "example": -0.34, + "nullable": true + }, + "text": { + "type": "string", + "example": "test" + } + } + }, "StreamDetails": { "type": "object", "required": [ @@ -386,7 +496,8 @@ "seed": { "type": "integer", "format": "int64", - "example": 42 + "example": 42, + "nullable": true } } }, @@ -415,7 +526,8 @@ "required": [ "id", "text", - "logprob" + "logprob", + "special" ], "properties": { "id": { @@ -429,6 +541,10 @@ "example": -0.34, "nullable": true }, + "special": { + "type": "boolean", + "example": "false" + }, "text": { "type": "string", "example": "test" diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml index a0d2b089..49948b05 100644 --- a/launcher/Cargo.toml +++ b/launcher/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-launcher" -version = "0.2.1" +version = "0.4.0" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Launcher" diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 3c8d9fcc..80466fe6 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -1,6 +1,7 @@ use clap::Parser; use serde_json::Value; use std::env; +use std::ffi::OsString; use std::io::{BufRead, BufReader, Read}; use std::path::Path; use std::process::ExitCode; @@ -12,7 +13,7 @@ use std::thread; use std::thread::sleep; use std::time::{Duration, Instant}; use std::{fs, io}; -use subprocess::{Popen, PopenConfig, PopenError, Redirection}; +use subprocess::{ExitStatus, Popen, PopenConfig, PopenError, Redirection}; /// App Configuration #[derive(Parser, Debug)] @@ -23,13 +24,21 @@ struct Args { #[clap(long, env)] revision: Option, #[clap(long, env)] + sharded: Option, + #[clap(long, env)] num_shard: Option, #[clap(long, env)] quantize: bool, #[clap(default_value = "128", long, env)] max_concurrent_requests: usize, + #[clap(default_value = "2", long, env)] + max_best_of: usize, + #[clap(default_value = "4", long, env)] + max_stop_sequences: usize, #[clap(default_value = "1000", long, env)] max_input_length: usize, + #[clap(default_value = "1512", long, env)] + max_total_tokens: usize, #[clap(default_value = "32", long, env)] max_batch_size: usize, #[clap(default_value = "20", long, env)] @@ -43,38 +52,112 @@ struct Args { #[clap(default_value = "29500", long, env)] master_port: usize, #[clap(long, env)] + huggingface_hub_cache: Option, + #[clap(long, env)] + weights_cache_override: Option, + #[clap(long, env)] + disable_custom_kernels: bool, + #[clap(long, env)] json_output: bool, #[clap(long, env)] otlp_endpoint: Option, + #[clap(long, env)] + cors_allow_origin: Vec, + #[clap(long, env)] + watermark_gamma: Option, + #[clap(long, env)] + watermark_delta: Option, } fn main() -> ExitCode { // Pattern match configuration + let args = Args::parse(); + + if args.json_output { + tracing_subscriber::fmt().json().init(); + } else { + tracing_subscriber::fmt().compact().init(); + } + + tracing::info!("{:?}", args); + let Args { model_id, revision, + sharded, num_shard, quantize, max_concurrent_requests, + max_best_of, + max_stop_sequences, max_input_length, + max_total_tokens, max_batch_size, max_waiting_tokens, port, shard_uds_path, master_addr, master_port, + huggingface_hub_cache, + weights_cache_override, + disable_custom_kernels, json_output, otlp_endpoint, - } = Args::parse(); + cors_allow_origin, + watermark_gamma, + watermark_delta, + } = args; - if json_output { - tracing_subscriber::fmt().json().init(); + // get the number of shards given `sharded` and `num_shard` + let num_shard = if let Some(sharded) = sharded { + // sharded is set + match sharded { + // sharded is set and true + true => { + match num_shard { + None => { + // try to default to the number of available GPUs + tracing::info!("Parsing num_shard from CUDA_VISIBLE_DEVICES"); + let n_devices = num_cuda_devices() + .expect("--num-shard and CUDA_VISIBLE_DEVICES are not set"); + if n_devices <= 1 { + panic!("`sharded` is true but only found {n_devices} CUDA devices"); + } + n_devices + } + Some(num_shard) => { + // we can't have only one shard while sharded + if num_shard <= 1 { + panic!("`sharded` is true but `num_shard` <= 1"); + } + num_shard + } + } + } + // sharded is set and false + false => { + let num_shard = num_shard.unwrap_or(1); + // we can't have more than one shard while not sharded + if num_shard != 1 { + panic!("`sharded` is false but `num_shard` != 1"); + } + num_shard + } + } } else { - tracing_subscriber::fmt().compact().init(); + match num_shard { + // get num_shard from CUDA_VISIBLE_DEVICES or default to a single shard + None => num_cuda_devices().unwrap_or(1), + Some(num_shard) => num_shard, + } + }; + if num_shard < 1 { + panic!("`num_shard` cannot be < 1"); } - // By default we only have one master shard - let num_shard = num_shard.unwrap_or(1); + if num_shard > 1 { + tracing::info!("Sharding model on {num_shard} processes"); + } // Signal handler let running = Arc::new(AtomicBool::new(true)); @@ -84,6 +167,121 @@ fn main() -> ExitCode { }) .expect("Error setting Ctrl-C handler"); + // Check if model_id is a local model + let local_path = Path::new(&model_id); + let is_local_model = local_path.exists() && local_path.is_dir(); + + // Download weights for sharded models + if !is_local_model && weights_cache_override.is_none() && num_shard > 1 { + let mut download_argv = vec![ + "text-generation-server".to_string(), + "download-weights".to_string(), + model_id.clone(), + "--extension".to_string(), + ".safetensors".to_string(), + "--logger-level".to_string(), + "INFO".to_string(), + "--json-output".to_string(), + ]; + + // Model optional revision + if let Some(ref revision) = revision { + download_argv.push("--revision".to_string()); + download_argv.push(revision.to_string()) + } + + // Copy current process env + let mut env: Vec<(OsString, OsString)> = env::vars_os().collect(); + + // If huggingface_hub_cache is set, pass it to the shard + // Useful when running inside a docker container + if let Some(ref huggingface_hub_cache) = huggingface_hub_cache { + env.push(("HUGGINGFACE_HUB_CACHE".into(), huggingface_hub_cache.into())); + }; + + // Enable hf transfer for insane download speeds + env.push(("HF_HUB_ENABLE_HF_TRANSFER".into(), "1".into())); + + // Start process + tracing::info!("Starting download process."); + let mut download_process = match Popen::create( + &download_argv, + PopenConfig { + stdout: Redirection::Pipe, + stderr: Redirection::Pipe, + // Needed for the shutdown procedure + setpgid: true, + env: Some(env), + ..Default::default() + }, + ) { + Ok(p) => p, + Err(err) => { + if let PopenError::IoError(ref err) = err { + if err.kind() == io::ErrorKind::NotFound { + tracing::error!("text-generation-server not found in PATH"); + tracing::error!("Please install it with `make install-server`") + } + } + return ExitCode::FAILURE; + } + }; + + // Redirect STDOUT to the console + let download_stdout = download_process.stdout.take().unwrap(); + thread::spawn(move || { + // Enter download tracing span + let stdout = BufReader::new(download_stdout); + let _span = tracing::span!(tracing::Level::INFO, "download").entered(); + for line in stdout.lines() { + // Parse loguru logs + if let Ok(value) = serde_json::from_str::(&line.unwrap()) { + if let Some(text) = value.get("text") { + // Format escaped newlines + tracing::info!("{}", text.to_string().replace("\\n", "")); + } + } + } + }); + + loop { + if let Some(status) = download_process.poll() { + match status { + ExitStatus::Exited(exit_code) => { + if exit_code == 0 { + tracing::info!("Successfully downloaded weights."); + break; + } else { + let mut err = String::new(); + download_process + .stderr + .take() + .unwrap() + .read_to_string(&mut err) + .unwrap(); + tracing::error!("Download encountered an error: {err}"); + return ExitCode::FAILURE; + } + } + _ => { + tracing::error!("Download process exited with an unknown status."); + return ExitCode::FAILURE; + } + } + } + if !running.load(Ordering::SeqCst) { + download_process.terminate().unwrap(); + tracing::info!("Waiting for download process to gracefully shutdown"); + download_process + .wait_timeout(Duration::from_secs(90)) + .unwrap(); + tracing::info!("Download process terminated"); + return ExitCode::SUCCESS; + } + sleep(Duration::from_millis(100)); + } + } + // Shared shutdown bool let shutdown = Arc::new(Mutex::new(false)); // Shared shutdown channel @@ -99,6 +297,8 @@ fn main() -> ExitCode { let revision = revision.clone(); let uds_path = shard_uds_path.clone(); let master_addr = master_addr.clone(); + let huggingface_hub_cache = huggingface_hub_cache.clone(); + let weights_cache_override = weights_cache_override.clone(); let status_sender = status_sender.clone(); let shutdown = shutdown.clone(); let shutdown_sender = shutdown_sender.clone(); @@ -113,6 +313,11 @@ fn main() -> ExitCode { num_shard, master_addr, master_port, + huggingface_hub_cache, + weights_cache_override, + disable_custom_kernels, + watermark_gamma, + watermark_delta, otlp_endpoint, status_sender, shutdown, @@ -161,8 +366,14 @@ fn main() -> ExitCode { "text-generation-router".to_string(), "--max-concurrent-requests".to_string(), max_concurrent_requests.to_string(), + "--max-best-of".to_string(), + max_best_of.to_string(), + "--max-stop-sequences".to_string(), + max_stop_sequences.to_string(), "--max-input-length".to_string(), max_input_length.to_string(), + "--max-total-tokens".to_string(), + max_total_tokens.to_string(), "--max-batch-size".to_string(), max_batch_size.to_string(), "--max-waiting-tokens".to_string(), @@ -185,6 +396,12 @@ fn main() -> ExitCode { argv.push(otlp_endpoint); } + // CORS origins + for origin in cors_allow_origin.into_iter() { + argv.push("--cors-allow-origin".to_string()); + argv.push(origin); + } + let mut webserver = match Popen::create( &argv, PopenConfig { @@ -232,7 +449,7 @@ fn main() -> ExitCode { while running.load(Ordering::SeqCst) { if let Ok(ShardStatus::Failed((rank, err))) = status_receiver.try_recv() { - tracing::error!("Shard {} failed:\n{}", rank, err); + tracing::error!("Shard {rank} failed:\n{err}"); exit_code = ExitCode::FAILURE; break; }; @@ -275,6 +492,11 @@ fn shard_manager( world_size: usize, master_addr: String, master_port: usize, + huggingface_hub_cache: Option, + weights_cache_override: Option, + disable_custom_kernels: bool, + watermark_gamma: Option, + watermark_delta: Option, otlp_endpoint: Option, status_sender: mpsc::Sender, shutdown: Arc>, @@ -319,43 +541,54 @@ fn shard_manager( shard_argv.push(otlp_endpoint); } - let mut env = vec![ - ("RANK".into(), rank.to_string().into()), - ("WORLD_SIZE".into(), world_size.to_string().into()), - ("MASTER_ADDR".into(), master_addr.into()), - ("MASTER_PORT".into(), master_port.to_string().into()), - ("SAFETENSORS_FAST_GPU".into(), "1".into()), - ("NCCL_ASYNC_ERROR_HANDLING".into(), "1".into()), - ]; + // Copy current process env + let mut env: Vec<(OsString, OsString)> = env::vars_os().collect(); - // If the HUGGINGFACE_HUB_CACHE env var is set, pass it to the shard + // Torch Distributed Env vars + env.push(("RANK".into(), rank.to_string().into())); + env.push(("WORLD_SIZE".into(), world_size.to_string().into())); + env.push(("MASTER_ADDR".into(), master_addr.into())); + env.push(("MASTER_PORT".into(), master_port.to_string().into())); + env.push(("NCCL_ASYNC_ERROR_HANDLING".into(), "1".into())); + + // Safetensors load fast + env.push(("SAFETENSORS_FAST_GPU".into(), "1".into())); + + // Enable hf transfer for insane download speeds + env.push(("HF_HUB_ENABLE_HF_TRANSFER".into(), "1".into())); + + // If huggingface_hub_cache is some, pass it to the shard // Useful when running inside a docker container - if let Ok(huggingface_hub_cache) = env::var("HUGGINGFACE_HUB_CACHE") { + if let Some(huggingface_hub_cache) = huggingface_hub_cache { env.push(("HUGGINGFACE_HUB_CACHE".into(), huggingface_hub_cache.into())); }; - // If the WEIGHTS_CACHE_OVERRIDE env var is set, pass it to the shard + // If weights_cache_override is some, pass it to the shard // Useful when running inside a HuggingFace Inference Endpoint - if let Ok(weights_cache_override) = env::var("WEIGHTS_CACHE_OVERRIDE") { + if let Some(weights_cache_override) = weights_cache_override { env.push(( "WEIGHTS_CACHE_OVERRIDE".into(), weights_cache_override.into(), )); }; - // If the NCCL_SHM_DISABLE env var is set, pass it to the shard - // needed when running NCCL inside a docker container and when you can't increase shm size - if let Ok(nccl_shm_disalbe) = env::var("NCCL_SHM_DISABLE") { - env.push(("NCCL_SHM_DISABLE".into(), nccl_shm_disalbe.into())); - }; + // If disable_custom_kernels is true, pass it to the shard as an env var + if disable_custom_kernels { + env.push(("DISABLE_CUSTOM_KERNELS".into(), "True".into())) + } - // If the CUDA_VISIBLE_DEVICES env var is set, pass it to the shard - if let Ok(cuda_visible_devices) = env::var("CUDA_VISIBLE_DEVICES") { - env.push(("CUDA_VISIBLE_DEVICES".into(), cuda_visible_devices.into())); - }; + // Watermark Gamma + if let Some(watermark_gamma) = watermark_gamma { + env.push(("WATERMARK_GAMMA".into(), watermark_gamma.to_string().into())) + } + + // Watermark Delta + if let Some(watermark_delta) = watermark_delta { + env.push(("WATERMARK_DELTA".into(), watermark_delta.to_string().into())) + } // Start process - tracing::info!("Starting shard {}", rank); + tracing::info!("Starting shard {rank}"); let mut p = match Popen::create( &shard_argv, PopenConfig { @@ -419,17 +652,17 @@ fn shard_manager( if *shutdown.lock().unwrap() { p.terminate().unwrap(); let _ = p.wait_timeout(Duration::from_secs(90)); - tracing::info!("Shard {} terminated", rank); + tracing::info!("Shard {rank} terminated"); return; } // Shard is ready if uds.exists() && !ready { - tracing::info!("Shard {} ready in {:?}", rank, start_time.elapsed()); + tracing::info!("Shard {rank} ready in {:?}", start_time.elapsed()); status_sender.send(ShardStatus::Ready).unwrap(); ready = true; } else if !ready && wait_time.elapsed() > Duration::from_secs(10) { - tracing::info!("Waiting for shard {} to be ready...", rank); + tracing::info!("Waiting for shard {rank} to be ready..."); wait_time = Instant::now(); } sleep(Duration::from_millis(100)); @@ -449,3 +682,11 @@ fn shutdown_shards(shutdown: Arc>, shutdown_receiver: &mpsc::Receive // This will block till all shutdown_sender are dropped let _ = shutdown_receiver.recv(); } + +fn num_cuda_devices() -> Option { + if let Ok(cuda_visible_devices) = env::var("CUDA_VISIBLE_DEVICES") { + let n_devices = cuda_visible_devices.split(',').count(); + return Some(n_devices); + } + None +} diff --git a/launcher/tests/bloom_560m.json b/launcher/tests/bloom_560m.json index 17e2571e..96f89f6b 100644 --- a/launcher/tests/bloom_560m.json +++ b/launcher/tests/bloom_560m.json @@ -1,122 +1,142 @@ { + "generated_text": ".get(\"action\");\n if (action == null) {\n throw new RuntimeException", "details": { "finish_reason": "length", "generated_tokens": 20, + "seed": null, "prefill": [ { "id": 10264, - "logprob": null, - "text": "Test" + "text": "Test", + "logprob": null }, { "id": 8821, - "logprob": -11.894989, - "text": " request" + "text": " request", + "logprob": -11.894989 } ], - "seed": null, "tokens": [ { "id": 17, + "text": ".", "logprob": -1.8267672, - "text": "." + "special": false }, { "id": 1587, + "text": "get", "logprob": -2.4674969, - "text": "get" + "special": false }, { "id": 11, + "text": "(", "logprob": -1.906001, - "text": "(" + "special": false }, { "id": 5, + "text": "\"", "logprob": -1.2279545, - "text": "\"" + "special": false }, { "id": 4899, + "text": "action", "logprob": -4.170299, - "text": "action" + "special": false }, { "id": 5, + "text": "\"", "logprob": -0.32478866, - "text": "\"" + "special": false }, { "id": 12, + "text": ")", "logprob": -1.0773665, - "text": ")" + "special": false }, { "id": 30, + "text": ";", "logprob": -0.27640742, - "text": ";" + "special": false }, { "id": 837, + "text": "\n ", "logprob": -1.6970354, - "text": "\n " + "special": false }, { "id": 1320, + "text": " if", "logprob": -1.4495516, - "text": " if" + "special": false }, { "id": 375, + "text": " (", "logprob": -0.23609057, - "text": " (" + "special": false }, { "id": 4899, + "text": "action", "logprob": -1.1916996, - "text": "action" + "special": false }, { "id": 3535, + "text": " ==", "logprob": -0.8918753, - "text": " ==" + "special": false }, { "id": 5109, + "text": " null", "logprob": -0.3933342, - "text": " null" + "special": false }, { "id": 12, + "text": ")", "logprob": -0.43212673, - "text": ")" + "special": false }, { "id": 731, + "text": " {", "logprob": -0.17702064, - "text": " {" + "special": false }, { "id": 1260, + "text": "\n ", "logprob": -0.07027565, - "text": "\n " + "special": false }, { "id": 10519, + "text": " throw", "logprob": -1.3915029, - "text": " throw" + "special": false }, { "id": 2084, + "text": " new", "logprob": -0.04201372, - "text": " new" + "special": false }, { "id": 150858, + "text": " RuntimeException", "logprob": -1.7329919, - "text": " RuntimeException" + "special": false } ] - }, - "generated_text": ".get(\"action\");\n if (action == null) {\n throw new RuntimeException" + } } \ No newline at end of file diff --git a/launcher/tests/integration_tests.rs b/launcher/tests/integration_tests.rs index b70b1628..0d2b6c74 100644 --- a/launcher/tests/integration_tests.rs +++ b/launcher/tests/integration_tests.rs @@ -14,6 +14,7 @@ pub struct Token { id: u32, text: String, logprob: Option, + special: bool, } #[derive(Deserialize)] @@ -136,6 +137,7 @@ fn compare_results(result: GeneratedText, expected: GeneratedText) { { assert_eq!(token.id, expected_token.id); assert_eq!(token.text, expected_token.text); + assert_eq!(token.special, expected_token.special); if let Some(logprob) = token.logprob { let expected_logprob = expected_token.logprob.unwrap(); assert_float_eq!(logprob, expected_logprob, abs <= 0.001); diff --git a/launcher/tests/mt0_base.json b/launcher/tests/mt0_base.json index cee3bc47..22c9499f 100644 --- a/launcher/tests/mt0_base.json +++ b/launcher/tests/mt0_base.json @@ -1,117 +1,137 @@ { + "generated_text": "\"\"\"Test the contents of the contents of the contents. \"\"\" test_test", "details": { "finish_reason": "length", "generated_tokens": 20, + "seed": null, "prefill": [ { "id": 0, - "logprob": null, - "text": "" + "text": "", + "logprob": null } ], - "seed": null, "tokens": [ { "id": 259, + "text": " ", "logprob": -1.3656927, - "text": "" + "special": false }, { "id": 215100, + "text": "\"\"\"", "logprob": -2.6551573, - "text": "\"\"\"" + "special": false }, { "id": 46138, + "text": "Test", "logprob": -1.8059857, - "text": "Test" + "special": false }, { "id": 287, + "text": " the", "logprob": -1.2102449, - "text": "the" + "special": false }, { "id": 259, + "text": " ", "logprob": -1.6057279, - "text": "" + "special": false }, { "id": 49076, + "text": "contents", "logprob": -3.6060903, - "text": "contents" + "special": false }, { "id": 304, + "text": " of", "logprob": -0.5270343, - "text": "of" + "special": false }, { "id": 287, + "text": " the", "logprob": -0.62522805, - "text": "the" + "special": false }, { "id": 259, + "text": " ", "logprob": -1.4069618, - "text": "" + "special": false }, { "id": 49076, + "text": "contents", "logprob": -2.621994, - "text": "contents" + "special": false }, { "id": 304, + "text": " of", "logprob": -1.3172221, - "text": "of" + "special": false }, { "id": 287, + "text": " the", "logprob": -0.3501925, - "text": "the" + "special": false }, { "id": 259, + "text": " ", "logprob": -0.7219573, - "text": "" + "special": false }, { "id": 49076, + "text": "contents", "logprob": -1.0494149, - "text": "contents" + "special": false }, { "id": 260, + "text": ".", "logprob": -1.0803378, - "text": "." + "special": false }, { "id": 259, + "text": " ", "logprob": -0.32933083, - "text": "" + "special": false }, { "id": 215100, + "text": "\"\"\"", "logprob": -0.11268901, - "text": "\"\"\"" + "special": false }, { "id": 2978, + "text": " test", "logprob": -1.5846587, - "text": "test" + "special": false }, { "id": 290, + "text": "_", "logprob": -0.49796978, - "text": "_" + "special": false }, { "id": 4125, + "text": "test", "logprob": -2.0026445, - "text": "test" + "special": false } ] - }, - "generated_text": "\"\"\"Test the contents of the contents of the contents. \"\"\" test_test" + } } \ No newline at end of file diff --git a/proto/generate.proto b/proto/generate.proto index 0c4f9626..5081ce1c 100644 --- a/proto/generate.proto +++ b/proto/generate.proto @@ -34,12 +34,16 @@ message NextTokenChooserParameters { uint32 top_k = 2; /// restricting to top tokens summing to prob_cut_off <= prob_cut_off float top_p = 3; + /// restricting to top tokens summing to prob_cut_off <= prob_cut_off + float typical_p = 4; /// apply sampling on the logits - bool do_sample = 4; + bool do_sample = 5; /// random seed for sampling - uint64 seed = 5; + uint64 seed = 6; /// repetition penalty - float repetition_penalty = 6; + float repetition_penalty = 7; + /// token watermarking using "A Watermark for Large Language Models" + bool watermark = 8; } message StoppingCriteriaParameters { @@ -54,12 +58,10 @@ message Request { uint64 id = 1; /// The generation context string inputs = 2; - /// The number of tokens inside inputs - uint32 input_length = 3; /// Next Token Chooser Parameters - NextTokenChooserParameters parameters = 4; + NextTokenChooserParameters parameters = 3; /// Stopping Criteria Parameters - StoppingCriteriaParameters stopping_parameters = 5; + StoppingCriteriaParameters stopping_parameters = 4; } message Batch { @@ -108,8 +110,10 @@ message Generation { float token_logprob = 4; /// Text string token_text = 5; + /// Is it a special token + bool token_is_special = 6; /// Complete generated text - GeneratedText generated_text = 6; + GeneratedText generated_text = 7; } message PrefillRequest { diff --git a/router/Cargo.toml b/router/Cargo.toml index f1ace790..292e6219 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-router" -version = "0.2.1" +version = "0.4.0" edition = "2021" authors = ["Olivier Dehaene"] description = "Text Generation Webserver" @@ -19,17 +19,21 @@ axum-tracing-opentelemetry = "0.9.0" text-generation-client = { path = "client" } clap = { version = "4.1.4", features = ["derive", "env"] } futures = "0.3.26" +metrics = "0.20.1" +metrics-exporter-prometheus = { version = "0.11.0", features = [] } nohash-hasher = "0.2.0" opentelemetry = { version = "0.18.0", features = ["rt-tokio"] } opentelemetry-otlp = "0.11.0" parking_lot = "0.12.1" rand = "0.8.5" +reqwest = { version = "0.11.14", features = [] } serde = "1.0.152" serde_json = "1.0.93" thiserror = "1.0.38" tokenizers = "0.13.2" tokio = { version = "1.25.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] } tokio-stream = "0.1.11" +tower-http = { version = "0.3.5", features = ["cors"] } tracing = "0.1.37" tracing-opentelemetry = "0.18.0" tracing-subscriber = { version = "0.3.16", features = ["json", "env-filter"] } diff --git a/router/client/Cargo.toml b/router/client/Cargo.toml index 7c7ed48e..fd43b63b 100644 --- a/router/client/Cargo.toml +++ b/router/client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-generation-client" -version = "0.2.1" +version = "0.4.0" edition = "2021" [dependencies] diff --git a/router/grpc-metadata/Cargo.toml b/router/grpc-metadata/Cargo.toml index 311092e3..f8676d67 100644 --- a/router/grpc-metadata/Cargo.toml +++ b/router/grpc-metadata/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "grpc-metadata" -version = "0.1.0" +version = "0.4.0" edition = "2021" [dependencies] diff --git a/router/src/infer.rs b/router/src/infer.rs index 4e368492..ae151d8a 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -1,9 +1,9 @@ /// Batching and inference logic use crate::validation::{Validation, ValidationError}; -use crate::GenerateRequest; use crate::{Entry, Queue, Token}; +use crate::{GenerateRequest, PrefillToken}; +use futures::future::try_join_all; use nohash_hasher::IntMap; -use std::future::Future; use std::sync::Arc; use text_generation_client::{ Batch, ClientError, GeneratedText, Generation, PrefillTokens, ShardedClient, @@ -81,6 +81,7 @@ impl Infer { .limit_concurrent_requests .try_acquire_owned() .map_err(|err| { + metrics::increment_counter!("tgi_request_failure", "err" => "overloaded"); tracing::error!("{err}"); err })?; @@ -138,7 +139,7 @@ impl Infer { .into_iter() .zip(tokens.logprobs.into_iter()) .zip(tokens.texts.into_iter()) - .map(|((id, logprob), text)| Token { id, text, logprob }) + .map(|((id, logprob), text)| PrefillToken { id, text, logprob }) .collect(); } // Push last token @@ -172,10 +173,48 @@ impl Infer { }) } else { let err = InferError::IncompleteGeneration; + metrics::increment_counter!("tgi_request_failure", "err" => "incomplete"); tracing::error!("{err}"); Err(err) } } + /// Add best_of new requests to the queue and return a InferResponse of the sequence with + /// the highest log probability per token + #[instrument(skip(self))] + pub(crate) async fn generate_best_of( + &self, + request: GenerateRequest, + best_of: usize, + ) -> Result<(InferResponse, Vec), InferError> { + // validate best_of parameter separately + let best_of = self.validation.validate_best_of(best_of)?; + + // create multiple generate requests + let mut infer_responses: Vec = + try_join_all((0..best_of).map(|_| self.generate(request.clone()))).await?; + + // get the sequence with the highest log probability per token + let mut max_index = 0; + let mut max_logprob: f32 = f32::MIN; + + for (i, response) in infer_responses.iter().enumerate() { + // mean logprobs of the generated tokens + let sequence_logprob = response + .tokens + .iter() + .map(|token| token.logprob) + .sum::() + / response.tokens.len() as f32; + + // set best sequence + if sequence_logprob > max_logprob { + max_index = i; + max_logprob = sequence_logprob; + } + } + let best_response = infer_responses.remove(max_index); + Ok((best_response, infer_responses)) + } } /// Batching logic @@ -190,7 +229,11 @@ async fn batching_task( shared: Arc, ) { // Minimum batch size after which we try to add more requests - let limit_min_batch_size = (max_batch_size / 2) as u32; + let limit_min_batch_size = if max_batch_size > 1 { + (max_batch_size / 2) as u32 + } else { + 0 + }; // Infinite loop loop { @@ -201,7 +244,7 @@ async fn batching_task( // This batch might be smaller than the maximum batch size if there are not enough requests // waiting in the queue while let Some((mut entries, batch, span)) = queue.next_batch(None, max_batch_size).await { - let mut cached_batch = wrap_future(client.prefill(batch), &mut entries) + let mut cached_batch = prefill(&mut client, batch, &mut entries) .instrument(span) .await; let mut waiting_tokens = 1; @@ -212,6 +255,7 @@ async fn batching_task( // Get current batch info let batch_size = batch.size; let mut batches = vec![batch]; + metrics::gauge!("tgi_batch_current_size", batch_size as f64); // If the current batch is too small, we try to add more requests to it if batch_size <= limit_min_batch_size { @@ -234,17 +278,17 @@ async fn batching_task( // because a new batch is being computed let entry_waiting_span = info_span!(parent: &entry.span, "waiting", batch_size = new_batch_size); - // Add relationship + // Add relationships + span.follows_from(&entry_waiting_span); entry_waiting_span.follows_from(&span); // Update entry entry.temp_span = Some(entry_waiting_span); }); // Generate one token for this new batch to have the attention past in cache - let new_cached_batch = - wrap_future(client.prefill(new_batch), &mut new_entries) - .instrument(span) - .await; + let new_cached_batch = prefill(&mut client, new_batch, &mut new_entries) + .instrument(span) + .await; // Reset waiting counter waiting_tokens = 1; // Extend current batch with the new batch @@ -262,35 +306,66 @@ async fn batching_task( // Create a new span to link the batch back to this entry let entry_batch_span = info_span!(parent: &entry.span, "infer", batch_size = next_batch_size); - // Add relationship + // Add relationships + next_batch_span.follows_from(&entry_batch_span); entry_batch_span.follows_from(&next_batch_span); // Update entry entry.temp_span = Some(entry_batch_span); }); - cached_batch = wrap_future(client.decode(batches), &mut entries) + cached_batch = decode(&mut client, batches, &mut entries) .instrument(next_batch_span) .await; waiting_tokens += 1; } + metrics::gauge!("tgi_batch_current_size", 0.0); } } } -/// Wrap a future inside a match statement to handle errors and send the responses to Infer #[instrument(skip_all)] -async fn wrap_future( - future: impl Future, Option), ClientError>>, +async fn prefill( + client: &mut ShardedClient, + batch: Batch, entries: &mut IntMap, ) -> Option { - match future.await { + let start_time = Instant::now(); + + match client.prefill(batch).await { Ok((generations, next_batch)) => { send_generations(generations, entries); + metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed(), "method" => "prefill"); + metrics::increment_counter!("tgi_batch_inference_success", "method" => "prefill"); next_batch } // If we have an error, we discard the whole batch Err(err) => { send_errors(err, entries); + metrics::increment_counter!("tgi_batch_inference_failure", "method" => "prefill"); + None + } + } +} + +#[instrument(skip_all)] +async fn decode( + client: &mut ShardedClient, + batches: Vec, + entries: &mut IntMap, +) -> Option { + let start_time = Instant::now(); + + match client.decode(batches).await { + Ok((generations, next_batch)) => { + send_generations(generations, entries); + metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed(), "method" => "decode"); + metrics::increment_counter!("tgi_batch_inference_success", "method" => "decode"); + next_batch + } + // If we have an error, we discard the whole batch + Err(err) => { + send_errors(err, entries); + metrics::increment_counter!("tgi_batch_inference_failure", "method" => "decode"); None } } @@ -303,6 +378,7 @@ fn send_errors(error: ClientError, entries: &mut IntMap) { // Create and enter a span to link this function back to the entry let _send_error_span = info_span!(parent: entry.temp_span.as_ref().expect("batch_span is None. This is a bug."), "send_error").entered(); let err = InferError::GenerationError(error.to_string()); + metrics::increment_counter!("tgi_request_failure", "err" => "generation"); tracing::error!("{err}"); // unwrap_or is valid here as we don't care if the receiver is gone. @@ -340,6 +416,7 @@ fn send_generations(generations: Vec, entries: &mut IntMap, + pub(crate) prefill: Vec, pub(crate) tokens: Vec, pub(crate) generated_text: GeneratedText, pub(crate) queued: Instant, @@ -406,3 +483,14 @@ pub enum InferError { #[error("Incomplete generation")] IncompleteGeneration, } + +impl InferError { + pub(crate) fn error_type(&self) -> &str { + match self { + InferError::GenerationError(_) => "generation", + InferError::Overloaded(_) => "overloaded", + InferError::ValidationError(_) => "validation", + InferError::IncompleteGeneration => "incomplete_generation", + } + } +} diff --git a/router/src/lib.rs b/router/src/lib.rs index 8e3199dd..91b4417c 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -12,6 +12,9 @@ use validation::Validation; #[derive(Clone, Debug, Deserialize, ToSchema)] pub(crate) struct GenerateParameters { + #[serde(default)] + #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)] + pub best_of: Option, #[serde(default)] #[schema( exclusive_minimum = 0.0, @@ -40,39 +43,64 @@ pub(crate) struct GenerateParameters { example = 0.95 )] pub top_p: Option, - #[serde(default = "default_do_sample")] + #[serde(default)] + #[schema( + exclusive_minimum = 0.0, + maximum = 1.0, + nullable = true, + default = "null", + example = 0.95 + )] + pub typical_p: Option, + #[serde(default)] #[schema(default = "false", example = true)] pub do_sample: bool, #[serde(default = "default_max_new_tokens")] #[schema(exclusive_minimum = 0, exclusive_maximum = 512, default = "20")] pub max_new_tokens: u32, #[serde(default)] - #[schema(inline, max_items = 4, example = json!(["photographer"]))] + #[schema(nullable = true, default = "null", example = false)] + pub return_full_text: Option, + #[serde(default)] + #[schema(inline, max_items = 4, example = json ! (["photographer"]))] pub stop: Vec, #[serde(default)] + #[schema(nullable = true, default = "null", example = "null")] + pub truncate: Option, + #[serde(default)] + #[schema(default = "false", example = true)] + pub watermark: bool, + #[serde(default)] #[schema(default = "true")] pub details: bool, #[serde(default)] + #[schema( + exclusive_minimum = 0, + nullable = true, + default = "null", + example = "null" + )] pub seed: Option, } -fn default_do_sample() -> bool { - false -} - fn default_max_new_tokens() -> u32 { 20 } fn default_parameters() -> GenerateParameters { GenerateParameters { + best_of: None, temperature: None, repetition_penalty: None, top_k: None, top_p: None, - do_sample: default_do_sample(), + typical_p: None, + do_sample: false, max_new_tokens: default_max_new_tokens(), - stop: vec![], + return_full_text: None, + stop: Vec::new(), + truncate: None, + watermark: false, details: false, seed: None, } @@ -86,14 +114,46 @@ pub(crate) struct GenerateRequest { pub parameters: GenerateParameters, } +#[derive(Clone, Debug, Deserialize, ToSchema)] +pub(crate) struct CompatGenerateRequest { + #[schema(example = "My name is Olivier and I")] + pub inputs: String, + #[serde(default = "default_parameters")] + pub parameters: GenerateParameters, + #[serde(default)] + #[allow(dead_code)] + pub stream: bool, +} + +impl From for GenerateRequest { + fn from(req: CompatGenerateRequest) -> Self { + Self { + inputs: req.inputs, + parameters: req.parameters, + } + } +} + +#[derive(Debug, Serialize, ToSchema)] +pub struct PrefillToken { + #[schema(example = 0)] + id: u32, + #[schema(example = "test")] + text: String, + #[schema(nullable = true, example = - 0.34)] + logprob: f32, +} + #[derive(Debug, Serialize, ToSchema)] pub struct Token { #[schema(example = 0)] id: u32, #[schema(example = "test")] text: String, - #[schema(nullable = true, example = -0.34)] + #[schema(nullable = true, example = - 0.34)] logprob: f32, + #[schema(example = "false")] + special: bool, } #[derive(Serialize, ToSchema)] @@ -108,16 +168,32 @@ pub(crate) enum FinishReason { StopSequence, } +#[derive(Serialize, ToSchema)] +pub(crate) struct BestOfSequence { + #[schema(example = "test")] + pub generated_text: String, + #[schema(example = "length")] + pub finish_reason: FinishReason, + #[schema(example = 1)] + pub generated_tokens: u32, + #[schema(nullable = true, example = 42)] + pub seed: Option, + pub prefill: Vec, + pub tokens: Vec, +} + #[derive(Serialize, ToSchema)] pub(crate) struct Details { #[schema(example = "length")] pub finish_reason: FinishReason, #[schema(example = 1)] pub generated_tokens: u32, - #[schema(example = 42)] + #[schema(nullable = true, example = 42)] pub seed: Option, - pub prefill: Option>, - pub tokens: Option>, + pub prefill: Vec, + pub tokens: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub best_of_sequences: Option>, } #[derive(Serialize, ToSchema)] @@ -134,7 +210,7 @@ pub(crate) struct StreamDetails { pub finish_reason: FinishReason, #[schema(example = 1)] pub generated_tokens: u32, - #[schema(example = 42)] + #[schema(nullable = true, example = 42)] pub seed: Option, } @@ -149,6 +225,6 @@ pub(crate) struct StreamResponse { #[derive(Serialize, ToSchema)] pub(crate) struct ErrorResponse { - #[schema(inline)] pub error: String, + pub error_type: String, } diff --git a/router/src/main.rs b/router/src/main.rs index 881d94fb..2ccf66b3 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -1,4 +1,5 @@ /// Text Generation Inference webserver entrypoint +use axum::http::HeaderValue; use clap::Parser; use opentelemetry::sdk::propagation::TraceContextPropagator; use opentelemetry::sdk::trace; @@ -7,9 +8,11 @@ use opentelemetry::sdk::Resource; use opentelemetry::{global, KeyValue}; use opentelemetry_otlp::WithExportConfig; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::path::Path; use text_generation_client::ShardedClient; use text_generation_router::server; use tokenizers::Tokenizer; +use tower_http::cors::AllowOrigin; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::{EnvFilter, Layer}; @@ -20,8 +23,14 @@ use tracing_subscriber::{EnvFilter, Layer}; struct Args { #[clap(default_value = "128", long, env)] max_concurrent_requests: usize, + #[clap(default_value = "2", long, env)] + max_best_of: usize, + #[clap(default_value = "4", long, env)] + max_stop_sequences: usize, #[clap(default_value = "1000", long, env)] max_input_length: usize, + #[clap(default_value = "1512", long, env)] + max_total_tokens: usize, #[clap(default_value = "32", long, env)] max_batch_size: usize, #[clap(default_value = "20", long, env)] @@ -38,6 +47,8 @@ struct Args { json_output: bool, #[clap(long, env)] otlp_endpoint: Option, + #[clap(long, env)] + cors_allow_origin: Option>, } fn main() -> Result<(), std::io::Error> { @@ -46,7 +57,10 @@ fn main() -> Result<(), std::io::Error> { // Pattern match configuration let Args { max_concurrent_requests, + max_best_of, + max_stop_sequences, max_input_length, + max_total_tokens, max_batch_size, max_waiting_tokens, port, @@ -55,17 +69,37 @@ fn main() -> Result<(), std::io::Error> { validation_workers, json_output, otlp_endpoint, + cors_allow_origin, } = args; if validation_workers == 0 { panic!("validation_workers must be > 0"); } - // Download and instantiate tokenizer + // CORS allowed origins + // map to go inside the option and then map to parse from String to HeaderValue + // Finally, convert to AllowOrigin + let cors_allow_origin: Option = cors_allow_origin.map(|cors_allow_origin| { + AllowOrigin::list( + cors_allow_origin + .iter() + .map(|origin| origin.parse::().unwrap()), + ) + }); + + // Tokenizer instance // This will only be used to validate payloads - // - // We need to download it outside of the Tokio runtime - let tokenizer = Tokenizer::from_pretrained(tokenizer_name, None).unwrap(); + let local_path = Path::new(&tokenizer_name); + let tokenizer = + if local_path.exists() && local_path.is_dir() && local_path.join("tokenizer.json").exists() + { + // Load local tokenizer + Tokenizer::from_file(local_path.join("tokenizer.json")).unwrap() + } else { + // Download and instantiate tokenizer + // We need to download it outside of the Tokio runtime + Tokenizer::from_pretrained(tokenizer_name.clone(), None).unwrap() + }; // Launch Tokio runtime tokio::runtime::Builder::new_multi_thread() @@ -75,6 +109,27 @@ fn main() -> Result<(), std::io::Error> { .block_on(async { init_logging(otlp_endpoint, json_output); + // Get pipeline tag + let model_info = reqwest::get(format!( + "https://huggingface.co/api/models/{tokenizer_name}" + )) + .await + .expect("Could not connect to hf.co") + .text() + .await + .expect("error when retrieving model info from hf.co"); + let model_info: serde_json::Value = + serde_json::from_str(&model_info).expect("unable to parse model info"); + + // if pipeline-tag == text-generation we default to return_full_text = true + let compat_return_full_text = match model_info.get("pipeline_tag") { + None => { + tracing::warn!("no pipeline tag found for model {tokenizer_name}"); + false + } + Some(pipeline_tag) => pipeline_tag.as_str() == Some("text-generation"), + }; + // Instantiate sharded client from the master unix socket let mut sharded_client = ShardedClient::connect_uds(master_shard_uds_path) .await @@ -91,14 +146,19 @@ fn main() -> Result<(), std::io::Error> { // Run server server::run( + compat_return_full_text, max_concurrent_requests, + max_best_of, + max_stop_sequences, max_input_length, + max_total_tokens, max_batch_size, max_waiting_tokens, sharded_client, tokenizer, validation_workers, addr, + cors_allow_origin, ) .await; Ok(()) diff --git a/router/src/queue.rs b/router/src/queue.rs index b155a1af..df2087e1 100644 --- a/router/src/queue.rs +++ b/router/src/queue.rs @@ -132,6 +132,7 @@ impl State { // Push entry in the queue self.entries.push((self.next_id, entry)); self.next_id += 1; + metrics::increment_gauge!("tgi_queue_size", 1.0); } // Get the next batch @@ -164,7 +165,8 @@ impl State { // Create a new span to link the batch back to this entry let entry_batch_span = info_span!(parent: &entry.span, "infer", batch_size = next_batch_size); - // Add relationship + // Add relationships + next_batch_span.follows_from(&entry_batch_span); entry_batch_span.follows_from(&next_batch_span); // Update entry entry.temp_span = Some(entry_batch_span); @@ -172,7 +174,6 @@ impl State { batch_requests.push(Request { id, inputs: entry.request.inputs.clone(), - input_length: entry.request.input_length, parameters: Some(entry.request.parameters.clone()), stopping_parameters: Some(entry.request.stopping_parameters.clone()), }); @@ -190,6 +191,8 @@ impl State { // Increment batch id self.next_batch_id += 1; + metrics::gauge!("tgi_queue_size", self.entries.len() as f64); + metrics::histogram!("tgi_batch_next_size", batch.size as f64); Some((batch_entries, batch, next_batch_span)) } } @@ -223,14 +226,15 @@ mod tests { Entry { request: ValidGenerateRequest { inputs: "".to_string(), - input_length: 0, parameters: NextTokenChooserParameters { temperature: 0.0, top_k: 0, top_p: 0.0, + typical_p: 0.0, do_sample: false, seed: 0, repetition_penalty: 0.0, + watermark: false, }, stopping_parameters: StoppingCriteriaParameters { max_new_tokens: 0, diff --git a/router/src/server.rs b/router/src/server.rs index 432586bb..3b63ec8a 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1,17 +1,20 @@ /// HTTP Server logic -use crate::infer::{InferError, InferStreamResponse}; +use crate::infer::{InferError, InferResponse, InferStreamResponse}; +use crate::validation::ValidationError; use crate::{ - Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, GenerateResponse, - Infer, StreamDetails, StreamResponse, Token, Validation, + BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason, + GenerateParameters, GenerateRequest, GenerateResponse, Infer, PrefillToken, StreamDetails, + StreamResponse, Token, Validation, }; use axum::extract::Extension; -use axum::http::{HeaderMap, StatusCode}; +use axum::http::{HeaderMap, Method, StatusCode}; use axum::response::sse::{Event, KeepAlive, Sse}; -use axum::response::IntoResponse; +use axum::response::{IntoResponse, Response}; use axum::routing::{get, post}; -use axum::{Json, Router}; +use axum::{http, Json, Router}; use axum_tracing_opentelemetry::opentelemetry_tracing_layer; use futures::Stream; +use metrics_exporter_prometheus::{PrometheusBuilder, PrometheusHandle}; use std::convert::Infallible; use std::net::SocketAddr; use text_generation_client::ShardedClient; @@ -19,29 +22,61 @@ use tokenizers::Tokenizer; use tokio::signal; use tokio::time::Instant; use tokio_stream::StreamExt; +use tower_http::cors::{AllowOrigin, CorsLayer}; use tracing::{info_span, instrument, Instrument}; use utoipa::OpenApi; use utoipa_swagger_ui::SwaggerUi; +/// Compatibility route with api-inference and AzureML +#[instrument(skip(infer))] +async fn compat_generate( + default_return_full_text: Extension, + infer: Extension, + req: Json, +) -> Result)> { + let mut req = req.0; + + // default return_full_text given the pipeline_tag + if req.parameters.return_full_text.is_none() { + req.parameters.return_full_text = Some(default_return_full_text.0) + } + + // switch on stream + if req.stream { + Ok(generate_stream(infer, Json(req.into())) + .await + .into_response()) + } else { + let (headers, generation) = generate(infer, Json(req.into())).await?; + // wrap generation inside a Vec to match api-inference + Ok((headers, Json(vec![generation.0])).into_response()) + } +} + /// Health check method #[instrument(skip(infer))] async fn health(infer: Extension) -> Result<(), (StatusCode, Json)> { // TODO: while this is the best health check we can do, it is a bit on the heavy side and might // be a bit too slow for a health check. - // What we should do instead if check if the gRPC channels are still healthy. + // What we should do instead is check if the gRPC channels are still healthy. // Send a small inference request infer .generate(GenerateRequest { inputs: "liveness".to_string(), parameters: GenerateParameters { + best_of: None, temperature: None, repetition_penalty: None, top_k: None, top_p: None, + typical_p: None, do_sample: false, max_new_tokens: 1, + return_full_text: None, stop: Vec::new(), + truncate: None, + watermark: false, details: false, seed: None, }, @@ -57,15 +92,15 @@ async fn health(infer: Extension) -> Result<(), (StatusCode, Json) -> Result<(), (StatusCode, Json, req: Json, -) -> Result)> { +) -> Result<(HeaderMap, Json), (StatusCode, Json)> { let span = tracing::Span::current(); let start_time = Instant::now(); - // Inference + let compute_characters = req.0.inputs.chars().count(); + let mut add_prompt = None; + if req.0.parameters.return_full_text.unwrap_or(false) { + add_prompt = Some(req.0.inputs.clone()); + } + let details = req.0.parameters.details; - let response = infer.generate(req.0).await?; + + // Inference + let (response, best_of_responses) = match req.0.parameters.best_of { + Some(best_of) if best_of > 1 => { + let (response, best_of_responses) = infer.generate_best_of(req.0, best_of).await?; + (response, Some(best_of_responses)) + } + _ => (infer.generate(req.0).await?, None), + }; // Token details let details = match details { - true => Some(Details { - finish_reason: FinishReason::from(response.generated_text.finish_reason), - generated_tokens: response.generated_text.generated_tokens, - prefill: Some(response.prefill), - tokens: Some(response.tokens), - seed: response.generated_text.seed, - }), + true => { + // convert best_of_responses + let best_of_sequences = best_of_responses.map(|responses: Vec| { + responses + .into_iter() + .map(|response: InferResponse| { + // Add prompt if return_full_text + let mut output_text = response.generated_text.text; + if let Some(prompt) = &add_prompt { + output_text = prompt.clone() + &output_text; + } + + BestOfSequence { + generated_text: output_text, + finish_reason: FinishReason::from( + response.generated_text.finish_reason, + ), + generated_tokens: response.generated_text.generated_tokens, + prefill: response.prefill, + tokens: response.tokens, + seed: response.generated_text.seed, + } + }) + .collect() + }); + + Some(Details { + finish_reason: FinishReason::from(response.generated_text.finish_reason), + generated_tokens: response.generated_text.generated_tokens, + prefill: response.prefill, + tokens: response.tokens, + seed: response.generated_text.seed, + best_of_sequences, + }) + } false => None, }; @@ -111,6 +187,15 @@ async fn generate( // Headers let mut headers = HeaderMap::new(); + headers.insert("x-compute-type", "gpu+optimized".parse().unwrap()); + headers.insert( + "x-compute-time", + total_time.as_millis().to_string().parse().unwrap(), + ); + headers.insert( + "x-compute-characters", + compute_characters.to_string().parse().unwrap(), + ); headers.insert( "x-total-time", total_time.as_millis().to_string().parse().unwrap(), @@ -141,9 +226,26 @@ async fn generate( span.record("seed", format!("{:?}", response.generated_text.seed)); tracing::info!("Output: {}", response.generated_text.text); + // Metrics + metrics::increment_counter!("tgi_request_success"); + metrics::histogram!("tgi_request_duration", total_time); + metrics::histogram!("tgi_request_validation_duration", validation_time); + metrics::histogram!("tgi_request_queue_duration", queue_time); + metrics::histogram!("tgi_request_inference_duration", inference_time); + metrics::histogram!("tgi_request_mean_time_per_token_duration", time_per_token); + metrics::histogram!( + "tgi_request_generated_tokens", + response.generated_text.generated_tokens as f64 + ); + // Send response + let mut output_text = response.generated_text.text; + if let Some(prompt) = add_prompt { + output_text = prompt + &output_text; + } + let response = GenerateResponse { - generated_text: response.generated_text.text, + generated_text: output_text, details, }; Ok((headers, Json(response))) @@ -156,20 +258,20 @@ async fn generate( path = "/generate_stream", request_body = GenerateRequest, responses( - (status = 200, description = "Generated Text", body = [StreamResponse], - content_type="text/event-stream "), - (status = 424, description = "Generation Error", body = [ErrorResponse], - example = json!({"error": "Request failed during generation"}), - content_type="text/event-stream "), - (status = 429, description = "Model is overloaded", body = [ErrorResponse], - example = json!({"error": "Model is overloaded"}), - content_type="text/event-stream "), - (status = 422, description = "Input validation error", body = [ErrorResponse], - example = json!({"error": "Input validation error"}), - content_type="text/event-stream "), - (status = 500, description = "Incomplete generation", body = [ErrorResponse], - example = json!({"error": "Incomplete generation"}), - content_type="text/event-stream "), + (status = 200, description = "Generated Text", body = StreamResponse, + content_type = "text/event-stream"), + (status = 424, description = "Generation Error", body = ErrorResponse, + example = json ! ({"error": "Request failed during generation"}), + content_type = "text/event-stream"), + (status = 429, description = "Model is overloaded", body = ErrorResponse, + example = json ! ({"error": "Model is overloaded"}), + content_type = "text/event-stream"), + (status = 422, description = "Input validation error", body = ErrorResponse, + example = json ! ({"error": "Input validation error"}), + content_type = "text/event-stream"), + (status = 500, description = "Incomplete generation", body = ErrorResponse, + example = json ! ({"error": "Incomplete generation"}), + content_type = "text/event-stream"), ) )] #[instrument( @@ -186,118 +288,177 @@ async fn generate( async fn generate_stream( infer: Extension, req: Json, -) -> Sse>> { +) -> ( + HeaderMap, + Sse>>, +) { let span = tracing::Span::current(); let start_time = Instant::now(); + let compute_characters = req.0.inputs.chars().count(); + + let mut headers = HeaderMap::new(); + headers.insert("x-compute-type", "gpu+optimized".parse().unwrap()); + headers.insert( + "x-compute-characters", + compute_characters.to_string().parse().unwrap(), + ); + let stream = async_stream::stream! { // Inference let mut end_reached = false; let mut error = false; + + let mut add_prompt = None; + if req.0.parameters.return_full_text.unwrap_or(false) { + add_prompt = Some(req.0.inputs.clone()); + } let details = req.0.parameters.details; - match infer.generate_stream(req.0).instrument(info_span!(parent: &span, "async_stream")).await { - Ok(mut response_stream) => { - // Server-Sent Event stream - while let Some(response) = response_stream.next().await { - match response { - Ok(response) => { - match response { - // Prefill is ignored - InferStreamResponse::Prefill(_) => {} - // Yield event for every new token - InferStreamResponse::Token(token) => { - // StreamResponse - let stream_token = StreamResponse { + let best_of = req.0.parameters.best_of.unwrap_or(1); + if best_of == 1 { + match infer.generate_stream(req.0).instrument(info_span!(parent: &span, "async_stream")).await { + Ok(mut response_stream) => { + // Server-Sent Event stream + while let Some(response) = response_stream.next().await { + match response { + Ok(response) => { + match response { + // Prefill is ignored + InferStreamResponse::Prefill(_) => {} + // Yield event for every new token + InferStreamResponse::Token(token) => { + // StreamResponse + let stream_token = StreamResponse { + token, + generated_text: None, + details: None, + }; + + yield Ok(Event::default().json_data(stream_token).unwrap()) + } + // Yield event for last token and compute timings + InferStreamResponse::End { token, - generated_text: None, - details: None, - }; + generated_text, + start, + queued, + } => { + // Token details + let details = match details { + true => Some(StreamDetails { + finish_reason: FinishReason::from(generated_text.finish_reason), + generated_tokens: generated_text.generated_tokens, + seed: generated_text.seed, + }), + false => None, + }; - yield Ok(Event::default().json_data(stream_token).unwrap()) - } - // Yield event for last token and compute timings - InferStreamResponse::End { - token, - generated_text, - start, - queued, - } => { - // Token details - let details = match details { - true => Some(StreamDetails { - finish_reason: FinishReason::from(generated_text.finish_reason), - generated_tokens: generated_text.generated_tokens, - seed: generated_text.seed, - }), - false => None, - }; + // Timings + let total_time = start_time.elapsed(); + let validation_time = queued - start_time; + let queue_time = start - queued; + let inference_time = Instant::now() - start; + let time_per_token = inference_time / generated_text.generated_tokens; - // Timings - let total_time = start_time.elapsed(); - let validation_time = queued - start_time; - let queue_time = start - queued; - let inference_time = Instant::now() - start; - let time_per_token = inference_time / generated_text.generated_tokens; + // Tracing metadata + span.record("total_time", format!("{total_time:?}")); + span.record("validation_time", format!("{validation_time:?}")); + span.record("queue_time", format!("{queue_time:?}")); + span.record("inference_time", format!("{inference_time:?}")); + span.record("time_per_token", format!("{time_per_token:?}")); + span.record("seed", format!("{:?}", generated_text.seed)); + tracing::info!(parent: &span, "Output: {}", generated_text.text); - // Tracing metadata - span.record("total_time", format!("{:?}", total_time)); - span.record("validation_time", format!("{:?}", validation_time)); - span.record("queue_time", format!("{:?}", queue_time)); - span.record("inference_time", format!("{:?}", inference_time)); - span.record("time_per_token", format!("{:?}", time_per_token)); - span.record("seed", format!("{:?}", generated_text.seed)); - tracing::info!(parent: &span, "Output: {}", generated_text.text); + // Metrics + metrics::increment_counter!("tgi_request_success"); + metrics::histogram!("tgi_request_duration", total_time); + metrics::histogram!("tgi_request_validation_duration", validation_time); + metrics::histogram!("tgi_request_queue_duration", queue_time); + metrics::histogram!("tgi_request_inference_duration", inference_time); + metrics::histogram!("tgi_request_mean_time_per_token_duration", time_per_token); + metrics::histogram!("tgi_request_generated_tokens", generated_text.generated_tokens as f64); - // StreamResponse - end_reached = true; - let stream_token = StreamResponse { - token, - generated_text: Some(generated_text.text), - details - }; + // StreamResponse + end_reached = true; - yield Ok(Event::default().json_data(stream_token).unwrap()) + let mut output_text = generated_text.text; + if let Some(prompt) = add_prompt { + output_text = prompt + &output_text; + } + + let stream_token = StreamResponse { + token, + generated_text: Some(output_text), + details + }; + + yield Ok(Event::default().json_data(stream_token).unwrap()); + break; + } } } - } - // yield error - Err(err) => { - error = true; - yield Ok(Event::from(err)) + // yield error + Err(err) => { + error = true; + yield Ok(Event::from(err)); + break; + } } } + }, + // yield error + Err(err) => { + error = true; + yield Ok(Event::from(err)); } - }, - // yield error - Err(err) => { - error = true; - yield Ok(Event::from(err)) } - } - // Check if generation reached the end - // Skip if we already sent an error - if !end_reached && !error { - let err = InferError::IncompleteGeneration; + // Check if generation reached the end + // Skip if we already sent an error + if !end_reached && !error { + let err = InferError::IncompleteGeneration; + metrics::increment_counter!("tgi_request_failure", "err" => "incomplete"); + tracing::error!("{err}"); + yield Ok(Event::from(err)); + } + } else { + let err = InferError::from(ValidationError::BestOfStream); + metrics::increment_counter!("tgi_request_failure", "err" => "validation"); tracing::error!("{err}"); - yield Ok(Event::from(err)) + yield Ok(Event::from(err)); } }; - Sse::new(stream).keep_alive(KeepAlive::default()) + (headers, Sse::new(stream).keep_alive(KeepAlive::default())) +} + +/// Prometheus metrics scrape endpoint +#[utoipa::path( + get, + tag = "Text Generation Inference", + path = "/metrics", + responses((status = 200, description = "Prometheus Metrics", body = String)) +)] +async fn metrics(prom_handle: Extension) -> String { + prom_handle.render() } /// Serving method #[allow(clippy::too_many_arguments)] pub async fn run( + compat_return_full_text: bool, max_concurrent_requests: usize, + max_best_of: usize, + max_stop_sequences: usize, max_input_length: usize, + max_total_tokens: usize, max_batch_size: usize, max_waiting_tokens: usize, client: ShardedClient, tokenizer: Tokenizer, validation_workers: usize, addr: SocketAddr, + allow_origin: Option, ) { // OpenAPI documentation #[derive(OpenApi)] @@ -305,13 +466,16 @@ pub async fn run( paths( generate, generate_stream, + metrics, ), components( schemas( GenerateRequest, GenerateParameters, + PrefillToken, Token, GenerateResponse, + BestOfSequence, Details, FinishReason, StreamResponse, @@ -333,7 +497,14 @@ pub async fn run( struct ApiDoc; // Create state - let validation = Validation::new(validation_workers, tokenizer, max_input_length); + let validation = Validation::new( + validation_workers, + tokenizer, + max_best_of, + max_stop_sequences, + max_input_length, + max_total_tokens, + ); let infer = Infer::new( client, validation, @@ -342,16 +513,33 @@ pub async fn run( max_concurrent_requests, ); + // Prometheus handler + let builder = PrometheusBuilder::new(); + let prom_handle = builder + .install_recorder() + .expect("failed to install metrics recorder"); + + // CORS layer + let allow_origin = allow_origin.unwrap_or(AllowOrigin::any()); + let cors_layer = CorsLayer::new() + .allow_methods([Method::GET, Method::POST]) + .allow_headers([http::header::CONTENT_TYPE]) + .allow_origin(allow_origin); + // Create router let app = Router::new() .merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi())) - .route("/", post(generate)) + .route("/", post(compat_generate)) .route("/generate", post(generate)) .route("/generate_stream", post(generate_stream)) .route("/", get(health)) .route("/health", get(health)) + .route("/metrics", get(metrics)) + .layer(Extension(compat_return_full_text)) .layer(Extension(infer)) - .layer(opentelemetry_tracing_layer()); + .layer(Extension(prom_handle)) + .layer(opentelemetry_tracing_layer()) + .layer(cors_layer); // Run server axum::Server::bind(&addr) @@ -415,6 +603,7 @@ impl From for (StatusCode, Json) { status_code, Json(ErrorResponse { error: err.to_string(), + error_type: err.error_type().to_string(), }), ) } @@ -425,6 +614,7 @@ impl From for Event { Event::default() .json_data(ErrorResponse { error: err.to_string(), + error_type: err.error_type().to_string(), }) .unwrap() } diff --git a/router/src/validation.rs b/router/src/validation.rs index aa1c1d23..1c350caa 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -1,3 +1,4 @@ +use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput}; /// Payload validation logic use crate::{GenerateParameters, GenerateRequest}; use rand::rngs::ThreadRng; @@ -5,33 +6,44 @@ use rand::Rng; use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters}; use thiserror::Error; use tokenizers::tokenizer::Tokenizer; +use tokenizers::TruncationDirection; use tokio::sync::{mpsc, oneshot}; use tracing::{instrument, Span}; -const MAX_MAX_NEW_TOKENS: u32 = 512; -const MAX_STOP_SEQUENCES: usize = 4; - /// Validation #[derive(Debug, Clone)] pub struct Validation { + /// maximum value for the best_of parameter + #[allow(dead_code)] + max_best_of: usize, /// Channel to communicate with the background validation task - sender: mpsc::Sender, + sender: mpsc::UnboundedSender, } impl Validation { - pub(crate) fn new(workers: usize, tokenizer: Tokenizer, max_input_length: usize) -> Self { + pub(crate) fn new( + workers: usize, + tokenizer: Tokenizer, + max_best_of: usize, + max_stop_sequences: usize, + max_input_length: usize, + max_total_tokens: usize, + ) -> Self { // Create channel - let (validation_sender, validation_receiver) = mpsc::channel(128); + let (validation_sender, validation_receiver) = mpsc::unbounded_channel(); // Launch background validation task tokio::spawn(validation_task( workers, tokenizer, + max_stop_sequences, max_input_length, + max_total_tokens, validation_receiver, )); Self { + max_best_of, sender: validation_sender, } } @@ -48,12 +60,25 @@ impl Validation { // Unwrap is safe here self.sender .send((request, sender, Span::current())) - .await .unwrap(); // Await on response channel // Unwrap is safe here receiver.await.unwrap() } + + /// Validate the best_of parameter + #[instrument(skip_all)] + pub(crate) fn validate_best_of(&self, best_of: usize) -> Result { + if self.max_best_of == 1 && best_of != 1 { + return Err(ValidationError::BestOfDisabled); + } + + if best_of > self.max_best_of { + return Err(ValidationError::BestOf(self.max_best_of, best_of)); + } + + Ok(best_of) + } } /// Validation task @@ -61,8 +86,10 @@ impl Validation { async fn validation_task( workers: usize, tokenizer: Tokenizer, + max_stop_sequences: usize, max_input_length: usize, - mut receiver: mpsc::Receiver, + max_total_tokens: usize, + mut receiver: mpsc::UnboundedReceiver, ) { let mut workers_senders = Vec::with_capacity(workers); @@ -75,7 +102,13 @@ async fn validation_task( // Spawn worker tokio::task::spawn_blocking(move || { - validation_worker(tokenizer_clone, max_input_length, worker_receiver) + validation_worker( + tokenizer_clone, + max_stop_sequences, + max_input_length, + max_total_tokens, + worker_receiver, + ) }); } @@ -95,7 +128,9 @@ async fn validation_task( /// the tokenizer fn validation_worker( tokenizer: Tokenizer, + max_stop_sequences: usize, max_input_length: usize, + max_total_tokens: usize, mut receiver: mpsc::Receiver, ) { // Seed rng @@ -106,7 +141,16 @@ fn validation_worker( parent_span.in_scope(|| { response_tx .send( - validate(request, &tokenizer, max_input_length, &mut rng).map_err(|err| { + validate( + request, + &tokenizer, + max_stop_sequences, + max_input_length, + max_total_tokens, + &mut rng, + ) + .map_err(|err| { + metrics::increment_counter!("tgi_request_failure", "err" => "validation"); tracing::error!("{err}"); err }), @@ -119,21 +163,39 @@ fn validation_worker( fn validate( request: GenerateRequest, tokenizer: &Tokenizer, + max_stop_sequences: usize, max_input_length: usize, + max_total_tokens: usize, rng: &mut ThreadRng, ) -> Result { let GenerateParameters { + best_of, temperature, repetition_penalty, top_k, top_p, + typical_p, do_sample, max_new_tokens, stop: stop_sequences, + truncate, seed, + watermark, .. } = request.parameters; + // sampling must be true when best_of > 1 + let best_of = best_of.unwrap_or(1); + let sampling = do_sample + || temperature.is_some() + || top_k.is_some() + || top_p.is_some() + || typical_p.is_some(); + + if best_of > 1 && !sampling { + return Err(BestOfSampling); + } + let temperature = temperature.unwrap_or(1.0); if temperature <= 0.0 { return Err(ValidationError::Temperature); @@ -144,30 +206,42 @@ fn validate( return Err(ValidationError::RepetitionPenalty); } - let top_p = top_p.unwrap_or(1.0); - if top_p <= 0.0 || top_p > 1.0 { - return Err(ValidationError::TopP); - } - - // Different because the proto default value is 0 while it is not a valid value + // Different because the proto default value is not a valid value // for the user - let top_k: u32 = match top_k { - None => Ok(0), - Some(top_k) => { - if top_k <= 0 { + let top_p = top_p + .map(|value| { + if value <= 0.0 || value >= 1.0 { + return Err(ValidationError::TopP); + } + Ok(value) + }) + .unwrap_or(Ok(1.0))?; + + let typical_p = typical_p + .map(|value| { + if value <= 0.0 || value >= 1.0 { + return Err(ValidationError::TypicalP); + } + Ok(value) + }) + .unwrap_or(Ok(1.0))?; + + let top_k: u32 = top_k + .map(|value| { + if value <= 0 { return Err(ValidationError::TopK); } - Ok(top_k as u32) - } - }?; + Ok(value as u32) + }) + .unwrap_or(Ok(0))?; - if max_new_tokens == 0 || max_new_tokens > MAX_MAX_NEW_TOKENS { - return Err(ValidationError::MaxNewTokens(MAX_MAX_NEW_TOKENS)); + if max_new_tokens == 0 { + return Err(ValidationError::MaxNewTokens); } - if stop_sequences.len() > MAX_STOP_SEQUENCES { + if stop_sequences.len() > max_stop_sequences { return Err(ValidationError::StopSequence( - MAX_STOP_SEQUENCES, + max_stop_sequences, stop_sequences.len(), )); } @@ -175,41 +249,82 @@ fn validate( // If seed is None, assign a random one let seed = match seed { None => rng.gen(), - Some(seed) => seed, + Some(seed) => { + if best_of > 1 { + return Err(BestOfSeed); + } + seed + } }; - // Get the number of tokens in the input - match tokenizer.encode(request.inputs.clone(), true) { - Ok(encoding) => { - let input_length = encoding.len(); - - if input_length > max_input_length { - Err(ValidationError::InputLength(input_length, max_input_length)) - } else { - // Return ValidGenerateRequest - let parameters = NextTokenChooserParameters { - temperature, - repetition_penalty, - top_k, - top_p, - do_sample, - seed, - }; - let stopping_parameters = StoppingCriteriaParameters { - max_new_tokens, - stop_sequences, - }; - - Ok(ValidGenerateRequest { - inputs: request.inputs, - input_length: input_length as u32, - parameters, - stopping_parameters, - }) - } - } - Err(err) => Err(ValidationError::Tokenizer(err.to_string())), + // Check if inputs is empty + if request.inputs.is_empty() { + return Err(EmptyInput); } + + // Check if truncate is strictly positive and less than max_input_length + let truncate = truncate + .map(|value| { + if value == 0 || value > max_input_length { + return Err(ValidationError::Truncate(max_input_length, value)); + } + Ok(Some(value)) + }) + .unwrap_or(Ok(None))?; + + // Get the number of tokens in the input + let mut encoding = tokenizer + .encode(request.inputs.clone(), true) + .map_err(|err| ValidationError::Tokenizer(err.to_string()))?; + + let (inputs, input_length) = if let Some(truncate) = truncate { + // truncate encoding and decode new inputs + encoding.truncate(truncate, 0, TruncationDirection::Left); + let inputs = tokenizer + .decode(Vec::from(encoding.get_ids()), false) + .map_err(|err| ValidationError::Tokenizer(err.to_string()))?; + (inputs, encoding.len()) + } else { + (request.inputs, encoding.len()) + }; + + if input_length > max_input_length { + return Err(ValidationError::InputLength(max_input_length, input_length)); + } + + let total_tokens = input_length + max_new_tokens as usize; + if total_tokens > max_total_tokens { + return Err(ValidationError::MaxTotalTokens( + max_total_tokens, + input_length, + max_new_tokens, + )); + } + + // Return ValidGenerateRequest + let parameters = NextTokenChooserParameters { + temperature, + repetition_penalty, + top_k, + top_p, + typical_p, + do_sample, + seed, + watermark, + }; + let stopping_parameters = StoppingCriteriaParameters { + max_new_tokens, + stop_sequences, + }; + + metrics::histogram!("tgi_request_input_length", input_length as f64); + metrics::histogram!("tgi_request_max_new_tokens", max_new_tokens as f64); + + Ok(ValidGenerateRequest { + inputs, + parameters, + stopping_parameters, + }) } type ValidationRequest = ( @@ -221,26 +336,43 @@ type ValidationRequest = ( #[derive(Debug)] pub(crate) struct ValidGenerateRequest { pub inputs: String, - pub input_length: u32, pub parameters: NextTokenChooserParameters, pub stopping_parameters: StoppingCriteriaParameters, } #[derive(Error, Debug)] pub enum ValidationError { - #[error("temperature must be strictly positive")] + #[error("`best_of` must be > 0 and <= {0}. Given: {1}")] + BestOf(usize, usize), + #[error("`best_of` != 1 is not allowed for this endpoint")] + BestOfDisabled, + #[error("you must use sampling when `best_of` is > 1")] + BestOfSampling, + #[error("`seed` must not be set when `best_of` > 1")] + BestOfSeed, + #[error("`best_of` != 1 is not supported when streaming tokens")] + BestOfStream, + #[error("`temperature` must be strictly positive")] Temperature, - #[error("repetition_penalty must be strictly positive")] + #[error("`repetition_penalty` must be strictly positive")] RepetitionPenalty, - #[error("top_p must be > 0.0 and <= 1.0")] + #[error("`top_p` must be > 0.0 and < 1.0")] TopP, - #[error("top_k must be strictly positive")] + #[error("`top_k` must be strictly positive")] TopK, - #[error("max_new_tokens must be strictly positive and <= {0}")] - MaxNewTokens(u32), - #[error("inputs must have less than {1} tokens. Given: {0}")] + #[error("`truncate` must be strictly positive and less than {0}. Given: {1}")] + Truncate(usize, usize), + #[error("`typical_p` must be > 0.0 and < 1.0")] + TypicalP, + #[error("`max_new_tokens` must be strictly positive")] + MaxNewTokens, + #[error("`inputs` tokens + `max_new_tokens` must be <= {0}. Given: {1} `inputs` tokens and {2} `max_new_tokens`")] + MaxTotalTokens(usize, usize, u32), + #[error("`inputs` must have less than {0} tokens. Given: {1}")] InputLength(usize, usize), - #[error("stop supports up to {0} stop sequences. Given: {1}")] + #[error("`inputs` cannot be empty")] + EmptyInput, + #[error("`stop` supports up to {0} stop sequences. Given: {1}")] StopSequence(usize, usize), #[error("tokenizer error {0}")] Tokenizer(String), diff --git a/server/.gitignore b/server/.gitignore index 5758ba92..aef74bb4 100644 --- a/server/.gitignore +++ b/server/.gitignore @@ -1,7 +1,7 @@ # Byte-compiled / optimized / DLL files __pycache__/ -text_generation/__pycache__/ -text_generation/pb/__pycache__/ +text_generation_server/__pycache__/ +text_generation_server/pb/__pycache__/ *.py[cod] *$py.class diff --git a/server/Makefile b/server/Makefile index 74ce5144..e8b0364e 100644 --- a/server/Makefile +++ b/server/Makefile @@ -1,20 +1,22 @@ +transformers_commit := 2b57aa18da658e7d2f42ef6bd5b56751af582fef + gen-server: # Compile protos pip install grpcio-tools==1.51.1 --no-cache-dir - mkdir text_generation/pb || true - python -m grpc_tools.protoc -I../proto --python_out=text_generation/pb --grpc_python_out=text_generation/pb ../proto/generate.proto - find text_generation/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; - touch text_generation/pb/__init__.py + mkdir text_generation_server/pb || true + python -m grpc_tools.protoc -I../proto --python_out=text_generation_server/pb --grpc_python_out=text_generation_server/pb ../proto/generate.proto + find text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \; + touch text_generation_server/pb/__init__.py install-transformers: # Install specific version of transformers with custom cuda kernels pip uninstall transformers -y || true rm -rf transformers || true - rm -rf transformers-text_generation_inference || true - curl -L -O https://github.com/OlivierDehaene/transformers/archive/refs/heads/text_generation_inference.zip - unzip text_generation_inference.zip - rm text_generation_inference.zip - mv transformers-text_generation_inference transformers + rm -rf transformers-$(transformers_commit) || true + curl -L -O https://github.com/OlivierDehaene/transformers/archive/$(transformers_commit).zip + unzip $(transformers_commit).zip + rm $(transformers_commit).zip + mv transformers-$(transformers_commit) transformers cd transformers && python setup.py install install-torch: @@ -26,4 +28,4 @@ install: gen-server install-torch install-transformers pip install -e . --no-cache-dir run-dev: - SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation/cli.py serve bigscience/bloom-560m --sharded \ No newline at end of file + SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded \ No newline at end of file diff --git a/server/poetry.lock b/server/poetry.lock index 0e0655cb..89ad31e9 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -145,30 +145,30 @@ testing = ["protobuf (>=3.6.0)"] [[package]] name = "grpcio" -version = "1.51.1" +version = "1.51.3" description = "HTTP/2-based RPC framework" category = "main" optional = false python-versions = ">=3.7" [package.extras] -protobuf = ["grpcio-tools (>=1.51.1)"] +protobuf = ["grpcio-tools (>=1.51.3)"] [[package]] name = "grpcio-reflection" -version = "1.51.1" +version = "1.51.3" description = "Standard Protobuf Reflection Service for gRPC" category = "main" optional = false python-versions = ">=3.6" [package.dependencies] -grpcio = ">=1.51.1" +grpcio = ">=1.51.3" protobuf = ">=4.21.6" [[package]] name = "grpcio-status" -version = "1.51.1" +version = "1.51.3" description = "Status proto mapping for gRPC" category = "main" optional = false @@ -176,22 +176,30 @@ python-versions = ">=3.6" [package.dependencies] googleapis-common-protos = ">=1.5.5" -grpcio = ">=1.51.1" +grpcio = ">=1.51.3" protobuf = ">=4.21.6" [[package]] name = "grpcio-tools" -version = "1.51.1" +version = "1.51.3" description = "Protobuf code generator for gRPC" category = "dev" optional = false python-versions = ">=3.7" [package.dependencies] -grpcio = ">=1.51.1" +grpcio = ">=1.51.3" protobuf = ">=4.21.6,<5.0dev" setuptools = "*" +[[package]] +name = "hf-transfer" +version = "0.1.2" +description = "" +category = "main" +optional = false +python-versions = ">=3.7" + [[package]] name = "idna" version = "3.4" @@ -428,7 +436,7 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "protobuf" -version = "4.21.12" +version = "4.22.0" description = "" category = "main" optional = false @@ -511,7 +519,7 @@ torch = ["torch"] [[package]] name = "setuptools" -version = "67.2.0" +version = "67.4.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" category = "main" optional = false @@ -567,7 +575,7 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.4.0" +version = "4.5.0" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false @@ -610,7 +618,7 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [[package]] name = "wrapt" -version = "1.14.1" +version = "1.15.0" description = "Module for decorators, wrappers and monkey patching." category = "main" optional = false @@ -622,7 +630,7 @@ bnb = ["bitsandbytes"] [metadata] lock-version = "1.1" python-versions = "^3.9" -content-hash = "f3cab6881b52045770a90ec9be7415a0ee499d9e980892d544f68073700cf321" +content-hash = "521dc9f3c283dc56f7d2e2f96759919ff27ab49ffd3ae7cd26317b209e7fa98d" [metadata.files] accelerate = [ @@ -760,106 +768,127 @@ grpc-interceptor = [ {file = "grpc_interceptor-0.15.0-py3-none-any.whl", hash = "sha256:63e390162e64df96c39c40508eb697def76a7cafac32a7eaf9272093eec1109e"}, ] grpcio = [ - {file = "grpcio-1.51.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:cc2bece1737b44d878cc1510ea04469a8073dbbcdd762175168937ae4742dfb3"}, - {file = "grpcio-1.51.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:e223a9793522680beae44671b9ed8f6d25bbe5ddf8887e66aebad5e0686049ef"}, - {file = "grpcio-1.51.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:24ac1154c4b2ab4a0c5326a76161547e70664cd2c39ba75f00fc8a2170964ea2"}, - {file = "grpcio-1.51.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4ef09f8997c4be5f3504cefa6b5c6cc3cf648274ce3cede84d4342a35d76db6"}, - {file = "grpcio-1.51.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8a0b77e992c64880e6efbe0086fe54dfc0bbd56f72a92d9e48264dcd2a3db98"}, - {file = "grpcio-1.51.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:eacad297ea60c72dd280d3353d93fb1dcca952ec11de6bb3c49d12a572ba31dd"}, - {file = "grpcio-1.51.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:16c71740640ba3a882f50b01bf58154681d44b51f09a5728180a8fdc66c67bd5"}, - {file = "grpcio-1.51.1-cp310-cp310-win32.whl", hash = "sha256:29cb97d41a4ead83b7bcad23bdb25bdd170b1e2cba16db6d3acbb090bc2de43c"}, - {file = "grpcio-1.51.1-cp310-cp310-win_amd64.whl", hash = "sha256:9ff42c5620b4e4530609e11afefa4a62ca91fa0abb045a8957e509ef84e54d30"}, - {file = "grpcio-1.51.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:bc59f7ba87972ab236f8669d8ca7400f02a0eadf273ca00e02af64d588046f02"}, - {file = "grpcio-1.51.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:3c2b3842dcf870912da31a503454a33a697392f60c5e2697c91d133130c2c85d"}, - {file = "grpcio-1.51.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22b011674090594f1f3245960ced7386f6af35485a38901f8afee8ad01541dbd"}, - {file = "grpcio-1.51.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49d680356a975d9c66a678eb2dde192d5dc427a7994fb977363634e781614f7c"}, - {file = "grpcio-1.51.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:094e64236253590d9d4075665c77b329d707b6fca864dd62b144255e199b4f87"}, - {file = "grpcio-1.51.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:257478300735ce3c98d65a930bbda3db172bd4e00968ba743e6a1154ea6edf10"}, - {file = "grpcio-1.51.1-cp311-cp311-win32.whl", hash = "sha256:5a6ebcdef0ef12005d56d38be30f5156d1cb3373b52e96f147f4a24b0ddb3a9d"}, - {file = "grpcio-1.51.1-cp311-cp311-win_amd64.whl", hash = "sha256:3f9b0023c2c92bebd1be72cdfca23004ea748be1813a66d684d49d67d836adde"}, - {file = "grpcio-1.51.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:cd3baccea2bc5c38aeb14e5b00167bd4e2373a373a5e4d8d850bd193edad150c"}, - {file = "grpcio-1.51.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:17ec9b13cec4a286b9e606b48191e560ca2f3bbdf3986f91e480a95d1582e1a7"}, - {file = "grpcio-1.51.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:fbdbe9a849854fe484c00823f45b7baab159bdd4a46075302281998cb8719df5"}, - {file = "grpcio-1.51.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:31bb6bc7ff145e2771c9baf612f4b9ebbc9605ccdc5f3ff3d5553de7fc0e0d79"}, - {file = "grpcio-1.51.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e473525c28251558337b5c1ad3fa969511e42304524a4e404065e165b084c9e4"}, - {file = "grpcio-1.51.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6f0b89967ee11f2b654c23b27086d88ad7bf08c0b3c2a280362f28c3698b2896"}, - {file = "grpcio-1.51.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7942b32a291421460d6a07883033e392167d30724aa84987e6956cd15f1a21b9"}, - {file = "grpcio-1.51.1-cp37-cp37m-win32.whl", hash = "sha256:f96ace1540223f26fbe7c4ebbf8a98e3929a6aa0290c8033d12526847b291c0f"}, - {file = "grpcio-1.51.1-cp37-cp37m-win_amd64.whl", hash = "sha256:f1fec3abaf274cdb85bf3878167cfde5ad4a4d97c68421afda95174de85ba813"}, - {file = "grpcio-1.51.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:0e1a9e1b4a23808f1132aa35f968cd8e659f60af3ffd6fb00bcf9a65e7db279f"}, - {file = "grpcio-1.51.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:6df3b63538c362312bc5fa95fb965069c65c3ea91d7ce78ad9c47cab57226f54"}, - {file = "grpcio-1.51.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:172405ca6bdfedd6054c74c62085946e45ad4d9cec9f3c42b4c9a02546c4c7e9"}, - {file = "grpcio-1.51.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:506b9b7a4cede87d7219bfb31014d7b471cfc77157da9e820a737ec1ea4b0663"}, - {file = "grpcio-1.51.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0fb93051331acbb75b49a2a0fd9239c6ba9528f6bdc1dd400ad1cb66cf864292"}, - {file = "grpcio-1.51.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5dca372268c6ab6372d37d6b9f9343e7e5b4bc09779f819f9470cd88b2ece3c3"}, - {file = "grpcio-1.51.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:471d39d3370ca923a316d49c8aac66356cea708a11e647e3bdc3d0b5de4f0a40"}, - {file = "grpcio-1.51.1-cp38-cp38-win32.whl", hash = "sha256:75e29a90dc319f0ad4d87ba6d20083615a00d8276b51512e04ad7452b5c23b04"}, - {file = "grpcio-1.51.1-cp38-cp38-win_amd64.whl", hash = "sha256:f1158bccbb919da42544a4d3af5d9296a3358539ffa01018307337365a9a0c64"}, - {file = "grpcio-1.51.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:59dffade859f157bcc55243714d57b286da6ae16469bf1ac0614d281b5f49b67"}, - {file = "grpcio-1.51.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:dad6533411d033b77f5369eafe87af8583178efd4039c41d7515d3336c53b4f1"}, - {file = "grpcio-1.51.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:4c4423ea38a7825b8fed8934d6d9aeebdf646c97e3c608c3b0bcf23616f33877"}, - {file = "grpcio-1.51.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0dc5354e38e5adf2498312f7241b14c7ce3484eefa0082db4297189dcbe272e6"}, - {file = "grpcio-1.51.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:97d67983189e2e45550eac194d6234fc38b8c3b5396c153821f2d906ed46e0ce"}, - {file = "grpcio-1.51.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:538d981818e49b6ed1e9c8d5e5adf29f71c4e334e7d459bf47e9b7abb3c30e09"}, - {file = "grpcio-1.51.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9235dcd5144a83f9ca6f431bd0eccc46b90e2c22fe27b7f7d77cabb2fb515595"}, - {file = "grpcio-1.51.1-cp39-cp39-win32.whl", hash = "sha256:aacb54f7789ede5cbf1d007637f792d3e87f1c9841f57dd51abf89337d1b8472"}, - {file = "grpcio-1.51.1-cp39-cp39-win_amd64.whl", hash = "sha256:2b170eaf51518275c9b6b22ccb59450537c5a8555326fd96ff7391b5dd75303c"}, - {file = "grpcio-1.51.1.tar.gz", hash = "sha256:e6dfc2b6567b1c261739b43d9c59d201c1b89e017afd9e684d85aa7a186c9f7a"}, + {file = "grpcio-1.51.3-cp310-cp310-linux_armv7l.whl", hash = "sha256:f601aaeae18dab81930fb8d4f916b0da21e89bb4b5f7367ef793f46b4a76b7b0"}, + {file = "grpcio-1.51.3-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:eef0450a4b5ed11feab639bf3eb1b6e23d0efa9b911bf7b06fb60e14f5f8a585"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82b0ad8ac825d4bb31bff9f638557c045f4a6d824d84b21e893968286f88246b"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3667c06e37d6cd461afdd51cefe6537702f3d1dc5ff4cac07e88d8b4795dc16f"}, + {file = "grpcio-1.51.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3709048fe0aa23dda09b3e69849a12055790171dab9e399a72ea8f9dfbf9ac80"}, + {file = "grpcio-1.51.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:200d69857f9910f7458b39b9bcf83ee4a180591b40146ba9e49314e3a7419313"}, + {file = "grpcio-1.51.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cd9a5e68e79c5f031500e67793048a90209711e0854a9ddee8a3ce51728de4e5"}, + {file = "grpcio-1.51.3-cp310-cp310-win32.whl", hash = "sha256:6604f614016127ae10969176bbf12eb0e03d2fb3d643f050b3b69e160d144fb4"}, + {file = "grpcio-1.51.3-cp310-cp310-win_amd64.whl", hash = "sha256:e95c7ccd4c5807adef1602005513bf7c7d14e5a41daebcf9d8d30d8bf51b8f81"}, + {file = "grpcio-1.51.3-cp311-cp311-linux_armv7l.whl", hash = "sha256:5e77ee138100f0bb55cbd147840f87ee6241dbd25f09ea7cd8afe7efff323449"}, + {file = "grpcio-1.51.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:68a7514b754e38e8de9075f7bb4dee919919515ec68628c43a894027e40ddec4"}, + {file = "grpcio-1.51.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c1b9f8afa62ff265d86a4747a2990ec5a96e4efce5d5888f245a682d66eca47"}, + {file = "grpcio-1.51.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8de30f0b417744288cec65ec8cf84b8a57995cf7f1e84ccad2704d93f05d0aae"}, + {file = "grpcio-1.51.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b69c7adc7ed60da1cb1b502853db61f453fc745f940cbcc25eb97c99965d8f41"}, + {file = "grpcio-1.51.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d81528ffe0e973dc840ec73a4132fd18b8203ad129d7410155d951a0a7e4f5d0"}, + {file = "grpcio-1.51.3-cp311-cp311-win32.whl", hash = "sha256:040eb421613b57c696063abde405916dd830203c184c9000fc8c3b3b3c950325"}, + {file = "grpcio-1.51.3-cp311-cp311-win_amd64.whl", hash = "sha256:2a8e17286c4240137d933b8ca506465472248b4ce0fe46f3404459e708b65b68"}, + {file = "grpcio-1.51.3-cp37-cp37m-linux_armv7l.whl", hash = "sha256:d5cd1389669a847555df54177b911d9ff6f17345b2a6f19388707b7a9f724c88"}, + {file = "grpcio-1.51.3-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:be1bf35ce82cdbcac14e39d5102d8de4079a1c1a6a06b68e41fcd9ef64f9dd28"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:5eed34994c095e2bf7194ffac7381c6068b057ef1e69f8f08db77771350a7566"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f9a7d88082b2a17ae7bd3c2354d13bab0453899e0851733f6afa6918373f476"}, + {file = "grpcio-1.51.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c8abbc5f837111e7bd619612eedc223c290b0903b952ce0c7b00840ea70f14"}, + {file = "grpcio-1.51.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:165b05af77e6aecb4210ae7663e25acf234ba78a7c1c157fa5f2efeb0d6ec53c"}, + {file = "grpcio-1.51.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54e36c2ee304ff15f2bfbdc43d2b56c63331c52d818c364e5b5214e5bc2ad9f6"}, + {file = "grpcio-1.51.3-cp37-cp37m-win32.whl", hash = "sha256:cd0daac21d9ef5e033a5100c1d3aa055bbed28bfcf070b12d8058045c4e821b1"}, + {file = "grpcio-1.51.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2fdd6333ce96435408565a9dbbd446212cd5d62e4d26f6a3c0feb1e3c35f1cc8"}, + {file = "grpcio-1.51.3-cp38-cp38-linux_armv7l.whl", hash = "sha256:54b0c29bdd9a3b1e1b61443ab152f060fc719f1c083127ab08d03fac5efd51be"}, + {file = "grpcio-1.51.3-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:ffaaf7e93fcb437356b5a4b23bf36e8a3d0221399ff77fd057e4bc77776a24be"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:eafbe7501a3268d05f2e450e1ddaffb950d842a8620c13ec328b501d25d2e2c3"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:881ecb34feabf31c6b3b9bbbddd1a5b57e69f805041e5a2c6c562a28574f71c4"}, + {file = "grpcio-1.51.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e860a3222139b41d430939bbec2ec9c3f6c740938bf7a04471a9a8caaa965a2e"}, + {file = "grpcio-1.51.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:49ede0528e9dac7e8a9fe30b16c73b630ddd9a576bf4b675eb6b0c53ee5ca00f"}, + {file = "grpcio-1.51.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6972b009638b40a448d10e1bc18e2223143b8a7aa20d7def0d78dd4af4126d12"}, + {file = "grpcio-1.51.3-cp38-cp38-win32.whl", hash = "sha256:5694448256e3cdfe5bd358f1574a3f2f51afa20cc834713c4b9788d60b7cc646"}, + {file = "grpcio-1.51.3-cp38-cp38-win_amd64.whl", hash = "sha256:3ea4341efe603b049e8c9a5f13c696ca37fcdf8a23ca35f650428ad3606381d9"}, + {file = "grpcio-1.51.3-cp39-cp39-linux_armv7l.whl", hash = "sha256:6c677581ce129f5fa228b8f418cee10bd28dd449f3a544ea73c8ba590ee49d0b"}, + {file = "grpcio-1.51.3-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:30e09b5e0531685e176f49679b6a3b190762cc225f4565e55a899f5e14b3aa62"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:c831f31336e81243f85b6daff3e5e8a123302ce0ea1f2726ad752fd7a59f3aee"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2cd2e4cefb724cab1ba2df4b7535a9980531b9ec51b4dbb5f137a1f3a3754ef0"}, + {file = "grpcio-1.51.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a0d0bf44438869d307f85a54f25a896ad6b4b0ca12370f76892ad732928d87"}, + {file = "grpcio-1.51.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:c02abd55409bfb293371554adf6a4401197ec2133dd97727c01180889014ba4d"}, + {file = "grpcio-1.51.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2f8ff75e61e1227ba7a3f16b2eadbcc11d0a54096d52ab75a6b88cfbe56f55d1"}, + {file = "grpcio-1.51.3-cp39-cp39-win32.whl", hash = "sha256:6c99a73a6260bdf844b2e5ddad02dcd530310f80e1fa72c300fa19c1c7496962"}, + {file = "grpcio-1.51.3-cp39-cp39-win_amd64.whl", hash = "sha256:22bdfac4f7f27acdd4da359b5e7e1973dc74bf1ed406729b07d0759fde2f064b"}, + {file = "grpcio-1.51.3.tar.gz", hash = "sha256:be7b2265b7527bb12109a7727581e274170766d5b3c9258d4e466f4872522d7a"}, ] grpcio-reflection = [ - {file = "grpcio-reflection-1.51.1.tar.gz", hash = "sha256:c07a93c0c36ef88fe475744289863b4787005eff4de0cc04213ecad718b01aae"}, - {file = "grpcio_reflection-1.51.1-py3-none-any.whl", hash = "sha256:b70af764a83e42a44f65df1edb232e972ab69e72bc7fbbad481e66c29a9d8cb8"}, + {file = "grpcio-reflection-1.51.3.tar.gz", hash = "sha256:5adca16f0a6cd403efa3b5f8f8a493eea6a37dee9473b178fad0a60efa68bc67"}, + {file = "grpcio_reflection-1.51.3-py3-none-any.whl", hash = "sha256:52b037f831908468afc89c60e591d0a2bbce24a393d908c44a6d53091e90fc41"}, ] grpcio-status = [ - {file = "grpcio-status-1.51.1.tar.gz", hash = "sha256:ac2617a3095935ebd785e2228958f24b10a0d527a0c9eb5a0863c784f648a816"}, - {file = "grpcio_status-1.51.1-py3-none-any.whl", hash = "sha256:a52cbdc4b18f325bfc13d319ae7c7ae7a0fee07f3d9a005504d6097896d7a495"}, + {file = "grpcio-status-1.51.3.tar.gz", hash = "sha256:71792c550356ba94e162c70818719ae6d67d960bdd03a9db5ff68faba2927f6c"}, + {file = "grpcio_status-1.51.3-py3-none-any.whl", hash = "sha256:d68d0956c16b6ea466f13c27075f126ef2cd8f0f97527d70056c64b0084357e3"}, ] grpcio-tools = [ - {file = "grpcio-tools-1.51.1.tar.gz", hash = "sha256:8e62d23d3fed9d4f81738f98dd193dbd2e21aed4a8f0dd715e75b5439e649727"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:ecf1494cb695afead36995534f787761ee33fb9e116b23030113a37fe6057a83"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:16b8b915625dc6eb2ea7efdfb06f1fae44a9066c9016453a2ca120c034f33090"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:d5e033c04b416afcddd5231b3ff94a34fb5d26fba2416eb940e69b05f22cfd25"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a218f64e667f3332b74080bdc5440aaf0fa6700ae07a0b54ecf085aaef2aa9f"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7b186183515ad6b8584ffe4bd820b72b00f6e7d121fb1c36294edeea9092313"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ccd37165d7a3e93f460096a2eb62b7a9c1ebe5c424eaee42d8e92740d0c8f6bc"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:531586c5598a99658249f3c5e92826d6d2bb117abd6ffc88527d1e1d9eaef924"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-win32.whl", hash = "sha256:392ad4cd004f7b843cf7d916d9a15b2d6585965bfef235be1c88d8f8649777e5"}, - {file = "grpcio_tools-1.51.1-cp310-cp310-win_amd64.whl", hash = "sha256:14e82c2b3ee7e300611c2c729d411b3b911e4cca5f4ec14787457a2fb72ff9d4"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:2281180490c475d09b7aa05dabafa5e09de9902176931e7295113f636c2b5360"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:c4649af7f5d9553975ee66b6bfae20a84be779f13e163fa835e782961895e63c"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f06bb0753b7cecbff154b523cfb8f45dee2c31b0a4c72bed7da44c57f1cba113"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a671466158ed74c07ee070fb940ed783acf59ba6e6e53cb4de8fd63819c6c7f"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:048793747339f327ea091d8f022c6756d89713d8080dffde5ce7380cc348ea8e"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f6caf36e7752728329a28f93afec7c4ec9015fc1c6e4460bd1eb0f3737e1c55a"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-win32.whl", hash = "sha256:67b304282cad38642587ebae68617e450e1ad4fa1c0c8b19e9e30274dbb32716"}, - {file = "grpcio_tools-1.51.1-cp311-cp311-win_amd64.whl", hash = "sha256:674b340f2f7bb2adbc3f15144bd37ce5ea83239f78b68dbbd0ea3cba00107e2b"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-linux_armv7l.whl", hash = "sha256:055819992ddd30c642a7fd6f344a03747be3afa95cb910f8a2e5efaabd41cde5"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:4e3249a2ec435b3b972610c66c8a714c188844500d564c910f57a2771dc61978"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:794f26a09b70f4f101df5cf54c6c12dc1b65747ab1dee5bda02c2991389ade56"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4957f1ffa16598aa5379505fcbaeb47d65693a46b0817f4ee61db76707092aeb"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9906fb6bf6d9c30c23d85153f12d130f44325afe8f9ebe58aa7a6c82ecade9d8"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87bc5f3e3698c65907d397003c64d25c3ea84e3d6aa46dac133bd98bf66835ee"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a66b3a5d18a7615f0f828b72e2d2935751459c89cc4725e56bdfb3d2cd93281f"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-win32.whl", hash = "sha256:566809d9942e78821b279af70f3cf159a328127f9f3d5fee8d83ad8b2d27b2fe"}, - {file = "grpcio_tools-1.51.1-cp37-cp37m-win_amd64.whl", hash = "sha256:aab24a342642329de38139cb26f8492882ca0d8551bb87f6530bcc613945a0d0"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:6b83d7fc2597c6d392c225177d1fbbcff74900f8cc40b33236987fd1ff841330"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:79c06d2577cb4d977922bbf01234de3b20f73d1784d3cbe3179deee1bdb9a60b"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:e9abc03d67793b1bf33dc766caa69a3333f9db029869ba6e8fc6cd9c251c0080"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64d8ad369417759f5fdb8ffb7cbd6374fecc06ab51c9a226dee9bbd7d311c3b5"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de51a0a71845b854f6a5967756c893c96bd03e37f39e5dce87b4f409dac36ee2"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:9dfe6c12b0e2c07f6a4a91a9912ef4e5bd007672533891a44e6f433ffbf7c3b1"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:27113b354f7587684eb55125733e6e5be1f489458abfe12344dabd918d8dcc54"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-win32.whl", hash = "sha256:98777b5031f1b3c58b688815ffa83435c103b2152c26eb144f80f4a4bb34addb"}, - {file = "grpcio_tools-1.51.1-cp38-cp38-win_amd64.whl", hash = "sha256:1c44b57a6770b78a1eafe355878ff1ec59a2fa07455a2cbd522c071eedae04d4"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:49624394805568acd7d767dea5a00d970fca5ad8f395fe0161eeea0de5133eba"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:6d6626a6e4dbe843df96dc8c08dd244d2191a75324f54bfa4ebaa3e76b0b1958"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:b4fb8ed6d29f2d6cf03ef99ffaad635bbc132a59be77013691392fe557e67144"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8cc862a1ad30f94528d66cc6f95fb9e659005e568313e54a23550535b649573"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e72a30be1746ea0749a8486d0ca0120c0b2757fe84fc246a5144b1ef66d7b89"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:331a897306adeec3c67470431ea8d8b4972b689d32966f94506d91f4dac20952"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f336ad9be661d92fa45940e74e8ff3d78e67ebe9b4f7ea8774b2d680c17aeb6c"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-win32.whl", hash = "sha256:40ef70e8c5d0310dedff9af502b520b4c7e215bce94094527fb959150a0c594a"}, - {file = "grpcio_tools-1.51.1-cp39-cp39-win_amd64.whl", hash = "sha256:15b8acf4eaa0ebe37e2f69108de49efd935b7abe9c7e58ba737490b99906aa76"}, + {file = "grpcio-tools-1.51.3.tar.gz", hash = "sha256:4fea28e3dd31871579a57058796a78093c75b74b74e9de2f2b7a7fd9a443d403"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-linux_armv7l.whl", hash = "sha256:779ac1ad2258b8debaa45595bfb3814806ed8880e3ea7f194e551d76a6255969"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:83bf605fe2b3591d3c8a78646f37c72c5832c4dd84b5f92405c17cb10b136be6"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:35f885c5afd8e6a77d320f5a9624b439a93f9be2b87fa7b7948c1ad7b2ba0894"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:253b639fb79a4d28ce494ae40e5695bf1e2cb4a05f205fc433c46b2049ab4d99"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c6b145587d6062e2335f0b3286501dd6853a1ea50bd466a913351b7c48e5f20"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:046c0b1e372d4acf552aa0c8f5e830f019d67b75f25aeb0968d15fbdd3eaabd3"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:efc90b0287908c46281eb61933acaa1b96a575d0160fc98b5c64b9dec46f60d1"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-win32.whl", hash = "sha256:8e9df40db7a0edd403b539cc142d6114270e35debf723a5b4a7a93d5c30fffc0"}, + {file = "grpcio_tools-1.51.3-cp310-cp310-win_amd64.whl", hash = "sha256:077adaee431c2b040dd77923964577087c32e828908e8fa2e53f8e003ad408c9"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-linux_armv7l.whl", hash = "sha256:b50f9b8a6482a90c1a41e731a879a130f7dea267065d0a06f47c9160ce5d01c3"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:89a68adcb4238aba69f3a364ac02c9a46e55b9e3fd8af1c6f384079abfa9347c"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d177da43e7f6fde6715df4a3015ae13158166bc2845ac7f9cfb526eafb41b8"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:793f9edef82f600a3324f8a3d8cd8318a8d02f28fb54f8236cbb35ce0928d186"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:f7583735542ced7d30baec6cc21bffeaffcec1523bf807e8f8f0047113b6d30a"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f2df233a3e7db23d9b516cb5e2bfe029465f40a72978bee0584e44e7860ea73f"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-win32.whl", hash = "sha256:7427939455735fbf2ea88c37f1585c9c8b809eec7b447642f34465eb4d26020b"}, + {file = "grpcio_tools-1.51.3-cp311-cp311-win_amd64.whl", hash = "sha256:ba76d15fd149b575170fa32a1f6a9ff2b38ff9db223229a8ad6f53450a452688"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-linux_armv7l.whl", hash = "sha256:d2212c682529263b3c9e903092d0ccbb9fc6afba820e4c2fa52c2c27720cdcae"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:405656b3cf9639427e6c30a795570cba4a7c06b88a3145866f7d2c05b7e048b4"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:3c445a064b2ef3d3475e26e2add8ddb4ac2933741ecddf71d5b071a3ad078db4"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7b3374f4a6579c58d16a5fab2e6b4e9bb8625a034a7f4cd6024f4d1cc12f2a0"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e8df08b65f9379c3f103147b29542b0141ca84e77d0eee9114ca5f9b3f0d23"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2fade12de08923b350475ca16d0d0bd68578c30fce89147aa0f94ef5759bc5a9"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d4ffb6325ed489065dbdca764cf37c3a29376bc657874116c9af788d7a0d2ee4"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-win32.whl", hash = "sha256:f8d17271fc58ed3503dd571c79917e126deca51f85f093770a9606e806aac9dc"}, + {file = "grpcio_tools-1.51.3-cp37-cp37m-win_amd64.whl", hash = "sha256:ef849687c7f2bd7f3277edc7c7cafc7042823d0fb078e3c01c861eb0c96ed181"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-linux_armv7l.whl", hash = "sha256:7fd18d8d211fbfd337fc12e5bdd57e62368f636addf901d290e68a39f1dfea38"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:233fc56f054424232e2086f444004413e33c699174ce6ee0e279c25227243fec"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:867fa1973fa8b0772077c15425f122f672a18b1c53709a8a2bff9d056db4c20e"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b486a99bdf2722e68a9d59769389e2fb86878b6f293be5111f7678e364a0c359"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8bbf412c357999f88d87f421fd48b4b114fc037fec7bbaed0cb7620c24a5e44"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1166744c40821bb0aa605d2af2287fac367756f858a3d18f4c3d25bc0b92757b"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:781896c488e07b9463196045e6725e52d018cd7d0e1062d4ab1eee2647ca9170"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-win32.whl", hash = "sha256:35c1ee7c766eb586f04ba41fa7711eb847767eb277a1737998374ac57768f1f0"}, + {file = "grpcio_tools-1.51.3-cp38-cp38-win_amd64.whl", hash = "sha256:584b201fb39307dcb1affcf2647656a0e6244423ef1659cc6caa3ff85c5ae5c1"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-linux_armv7l.whl", hash = "sha256:e02231e21029f716a1d23a0b5e664fa243d147da33a3f55088a9529b860aa4ac"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:fbb742e10bd548031b8d80f7c28eb70c7c3a9850f8e99c98cd496f19a05f9fee"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:a836a72c657f751244cdb358c3461a89627e6d02654079d2450cfe361800428c"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb554408e0ec5ff5201013f268726d9eef8e5bd1fd4b4e09c46c0b4a9de8b64c"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:158c5bfe7e157fd9a944bde9f7dfe3b468416666e4fade77cd17caa3edc8bd81"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:715c792679224171c0584e9f235b921d76f8990deb38b0d1215d0469301d9cd9"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ece44f42b10e0bceb49235be1e361e1ee69afee7f938c82fb656a601a4a720e3"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-win32.whl", hash = "sha256:980e632710ba05e04364c6f276e905d5d367437f1ce2265ce7b96b5c1eac5693"}, + {file = "grpcio_tools-1.51.3-cp39-cp39-win_amd64.whl", hash = "sha256:5f4c47b14e66f80365cd5667ecc2f7fb0eb91e02c4e54362041b758feaa00511"}, +] +hf-transfer = [ + {file = "hf_transfer-0.1.2-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:2b9189a4a460646ee135ee771f39c0f695d3d5bf08b7ff1dcfe374227520e994"}, + {file = "hf_transfer-0.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:654fcaba4e7084caa1e97430982ea968935a72916ee0f4afc60e356f89774099"}, + {file = "hf_transfer-0.1.2-cp310-none-win_amd64.whl", hash = "sha256:eb29e7b3707b5cac02e689c89111685ebcdaa3cebba02eb7ac1b0f076357da72"}, + {file = "hf_transfer-0.1.2-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:0bfca9bd84e925e978a0f157df488704c17a0b9ad240b2859262faba0c74cd40"}, + {file = "hf_transfer-0.1.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d00c5473b35227b2f113fd43ff13cbac9539f2e6779fa0680a887b0aac31c389"}, + {file = "hf_transfer-0.1.2-cp311-none-win_amd64.whl", hash = "sha256:1aaf5937aa433b7d09ce5bf60967ec22b7d3982957b00516a8dc2aaa66384372"}, + {file = "hf_transfer-0.1.2-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:b0aa760a55995ad59ea17e395babafdc56c4e664be0c2d2055664199dd913da1"}, + {file = "hf_transfer-0.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:889dd15e8472daf66e266eb056e31a485af3c35f95a483bb43489a0f6e44c359"}, + {file = "hf_transfer-0.1.2-cp37-none-win_amd64.whl", hash = "sha256:30df586e18ec8a8e67e3201b9038210d94cb3c03c1cbd97673b9c78ede227178"}, + {file = "hf_transfer-0.1.2-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:cc97eb97f929f96bed896cd3af9bbdf121c15ac6d63524b9fc9312fd2929099a"}, + {file = "hf_transfer-0.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:583c2c80210a60dafed9a81ba50c389878aee6c34b2dd375cd84522658f29ad8"}, + {file = "hf_transfer-0.1.2-cp38-none-win_amd64.whl", hash = "sha256:6dff58f50d1435b0346f31a32f1f9e2301986521c1d0b51e47a3c82b96d02156"}, + {file = "hf_transfer-0.1.2-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:d6db1a8f539133f7a893bb32721916fe72b4d2aa3eb7604581ba1f03b8167c90"}, + {file = "hf_transfer-0.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f284e3f775d215c9a8d3d1c6f6b1001b1e7990d73ae5fd9aea6c9bce9ea79285"}, + {file = "hf_transfer-0.1.2-cp39-none-win_amd64.whl", hash = "sha256:8625beabebc582eafc4141a5ecb9f1183b728d4f63767f01fdcf1e2fbafe6d43"}, + {file = "hf_transfer-0.1.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:947dd1b8b22ac10723b2887ed4b5ef929f7d4dd850b0d66c0c6954a9a85afb06"}, + {file = "hf_transfer-0.1.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90a020f41dfae4629186c284888cd5adbebe402e2497a88351416ab93c7df9a8"}, + {file = "hf_transfer-0.1.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5eb89698746a29805bfc60126b9a008e6ba08a82ef9bb122a6544e84f748e8a4"}, + {file = "hf_transfer-0.1.2.tar.gz", hash = "sha256:6bf847f4c19c7d8d9f9bbb8a7ed52e1271bbf0c1bd920357db0c274ccc69f21d"}, ] idna = [ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, @@ -965,20 +994,19 @@ pluggy = [ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, ] protobuf = [ - {file = "protobuf-4.21.12-cp310-abi3-win32.whl", hash = "sha256:b135410244ebe777db80298297a97fbb4c862c881b4403b71bac9d4107d61fd1"}, - {file = "protobuf-4.21.12-cp310-abi3-win_amd64.whl", hash = "sha256:89f9149e4a0169cddfc44c74f230d7743002e3aa0b9472d8c28f0388102fc4c2"}, - {file = "protobuf-4.21.12-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:299ea899484ee6f44604deb71f424234f654606b983cb496ea2a53e3c63ab791"}, - {file = "protobuf-4.21.12-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:d1736130bce8cf131ac7957fa26880ca19227d4ad68b4888b3be0dea1f95df97"}, - {file = "protobuf-4.21.12-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:78a28c9fa223998472886c77042e9b9afb6fe4242bd2a2a5aced88e3f4422aa7"}, - {file = "protobuf-4.21.12-cp37-cp37m-win32.whl", hash = "sha256:3d164928ff0727d97022957c2b849250ca0e64777ee31efd7d6de2e07c494717"}, - {file = "protobuf-4.21.12-cp37-cp37m-win_amd64.whl", hash = "sha256:f45460f9ee70a0ec1b6694c6e4e348ad2019275680bd68a1d9314b8c7e01e574"}, - {file = "protobuf-4.21.12-cp38-cp38-win32.whl", hash = "sha256:6ab80df09e3208f742c98443b6166bcb70d65f52cfeb67357d52032ea1ae9bec"}, - {file = "protobuf-4.21.12-cp38-cp38-win_amd64.whl", hash = "sha256:1f22ac0ca65bb70a876060d96d914dae09ac98d114294f77584b0d2644fa9c30"}, - {file = "protobuf-4.21.12-cp39-cp39-win32.whl", hash = "sha256:27f4d15021da6d2b706ddc3860fac0a5ddaba34ab679dc182b60a8bb4e1121cc"}, - {file = "protobuf-4.21.12-cp39-cp39-win_amd64.whl", hash = "sha256:237216c3326d46808a9f7c26fd1bd4b20015fb6867dc5d263a493ef9a539293b"}, - {file = "protobuf-4.21.12-py2.py3-none-any.whl", hash = "sha256:a53fd3f03e578553623272dc46ac2f189de23862e68565e83dde203d41b76fc5"}, - {file = "protobuf-4.21.12-py3-none-any.whl", hash = "sha256:b98d0148f84e3a3c569e19f52103ca1feacdac0d2df8d6533cf983d1fda28462"}, - {file = "protobuf-4.21.12.tar.gz", hash = "sha256:7cd532c4566d0e6feafecc1059d04c7915aec8e182d1cf7adee8b24ef1e2e6ab"}, + {file = "protobuf-4.22.0-cp310-abi3-win32.whl", hash = "sha256:b2fea9dc8e3c0f32c38124790ef16cba2ee0628fe2022a52e435e1117bfef9b1"}, + {file = "protobuf-4.22.0-cp310-abi3-win_amd64.whl", hash = "sha256:a33a273d21852f911b8bda47f39f4383fe7c061eb1814db2c76c9875c89c2491"}, + {file = "protobuf-4.22.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:e894e9ae603e963f0842498c4cd5d39c6a60f0d7e4c103df50ee939564298658"}, + {file = "protobuf-4.22.0-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:7c535d126e7dcc714105ab20b418c4fedbd28f8b8afc42b7350b1e317bbbcc71"}, + {file = "protobuf-4.22.0-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:86c3d20428b007537ba6792b475c0853bba7f66b1f60e610d913b77d94b486e4"}, + {file = "protobuf-4.22.0-cp37-cp37m-win32.whl", hash = "sha256:1669cb7524221a8e2d9008d0842453dbefdd0fcdd64d67672f657244867635fb"}, + {file = "protobuf-4.22.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ab4d043865dd04e6b09386981fe8f80b39a1e46139fb4a3c206229d6b9f36ff6"}, + {file = "protobuf-4.22.0-cp38-cp38-win32.whl", hash = "sha256:29288813aacaa302afa2381db1d6e0482165737b0afdf2811df5fa99185c457b"}, + {file = "protobuf-4.22.0-cp38-cp38-win_amd64.whl", hash = "sha256:e474b63bab0a2ea32a7b26a4d8eec59e33e709321e5e16fb66e766b61b82a95e"}, + {file = "protobuf-4.22.0-cp39-cp39-win32.whl", hash = "sha256:47d31bdf58222dd296976aa1646c68c6ee80b96d22e0a3c336c9174e253fd35e"}, + {file = "protobuf-4.22.0-cp39-cp39-win_amd64.whl", hash = "sha256:c27f371f0159feb70e6ea52ed7e768b3f3a4c5676c1900a7e51a24740381650e"}, + {file = "protobuf-4.22.0-py3-none-any.whl", hash = "sha256:c3325803095fb4c2a48649c321d2fbde59f8fbfcb9bfc7a86df27d112831c571"}, + {file = "protobuf-4.22.0.tar.gz", hash = "sha256:652d8dfece122a24d98eebfef30e31e455d300efa41999d1182e015984ac5930"}, ] psutil = [ {file = "psutil-5.9.4-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:c1ca331af862803a42677c120aff8a814a804e09832f166f226bfd22b56feee8"}, @@ -1089,8 +1117,8 @@ safetensors = [ {file = "safetensors-0.2.8.tar.gz", hash = "sha256:2720b20a6a38c799dca79bd76caeeac2f7df585a9d4f7d59fa7e28eff9ccb27f"}, ] setuptools = [ - {file = "setuptools-67.2.0-py3-none-any.whl", hash = "sha256:16ccf598aab3b506593c17378473978908a2734d7336755a8769b480906bec1c"}, - {file = "setuptools-67.2.0.tar.gz", hash = "sha256:b440ee5f7e607bb8c9de15259dba2583dd41a38879a7abc1d43a71c59524da48"}, + {file = "setuptools-67.4.0-py3-none-any.whl", hash = "sha256:f106dee1b506dee5102cc3f3e9e68137bbad6d47b616be7991714b0c62204251"}, + {file = "setuptools-67.4.0.tar.gz", hash = "sha256:e5fd0a713141a4a105412233c63dc4e17ba0090c8e8334594ac790ec97792330"}, ] tomli = [ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, @@ -1124,8 +1152,8 @@ typer = [ {file = "typer-0.6.1.tar.gz", hash = "sha256:2d5720a5e63f73eaf31edaa15f6ab87f35f0690f8ca233017d7d23d743a91d73"}, ] typing-extensions = [ - {file = "typing_extensions-4.4.0-py3-none-any.whl", hash = "sha256:16fa4864408f655d35ec496218b85f79b3437c829e93320c7c9215ccfd92489e"}, - {file = "typing_extensions-4.4.0.tar.gz", hash = "sha256:1511434bb92bf8dd198c12b1cc812e800d4181cfcb867674e0f8279cc93087aa"}, + {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, + {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, ] urllib3 = [ {file = "urllib3-1.26.14-py2.py3-none-any.whl", hash = "sha256:75edcdc2f7d85b137124a6c3c9fc3933cdeaa12ecb9a6a959f22797a0feca7e1"}, @@ -1140,68 +1168,79 @@ win32-setctime = [ {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, ] wrapt = [ - {file = "wrapt-1.14.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:1b376b3f4896e7930f1f772ac4b064ac12598d1c38d04907e696cc4d794b43d3"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:903500616422a40a98a5a3c4ff4ed9d0066f3b4c951fa286018ecdf0750194ef"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5a9a0d155deafd9448baff28c08e150d9b24ff010e899311ddd63c45c2445e28"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:ddaea91abf8b0d13443f6dac52e89051a5063c7d014710dcb4d4abb2ff811a59"}, - {file = "wrapt-1.14.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:36f582d0c6bc99d5f39cd3ac2a9062e57f3cf606ade29a0a0d6b323462f4dd87"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:7ef58fb89674095bfc57c4069e95d7a31cfdc0939e2a579882ac7d55aadfd2a1"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:e2f83e18fe2f4c9e7db597e988f72712c0c3676d337d8b101f6758107c42425b"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:ee2b1b1769f6707a8a445162ea16dddf74285c3964f605877a20e38545c3c462"}, - {file = "wrapt-1.14.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:833b58d5d0b7e5b9832869f039203389ac7cbf01765639c7309fd50ef619e0b1"}, - {file = "wrapt-1.14.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80bb5c256f1415f747011dc3604b59bc1f91c6e7150bd7db03b19170ee06b320"}, - {file = "wrapt-1.14.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07f7a7d0f388028b2df1d916e94bbb40624c59b48ecc6cbc232546706fac74c2"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02b41b633c6261feff8ddd8d11c711df6842aba629fdd3da10249a53211a72c4"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fe803deacd09a233e4762a1adcea5db5d31e6be577a43352936179d14d90069"}, - {file = "wrapt-1.14.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:257fd78c513e0fb5cdbe058c27a0624c9884e735bbd131935fd49e9fe719d310"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4fcc4649dc762cddacd193e6b55bc02edca674067f5f98166d7713b193932b7f"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:11871514607b15cfeb87c547a49bca19fde402f32e2b1c24a632506c0a756656"}, - {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"}, - {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"}, - {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"}, - {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:a85d2b46be66a71bedde836d9e41859879cc54a2a04fad1191eb50c2066f6e9d"}, - {file = "wrapt-1.14.1-cp35-cp35m-win32.whl", hash = "sha256:dbcda74c67263139358f4d188ae5faae95c30929281bc6866d00573783c422b7"}, - {file = "wrapt-1.14.1-cp35-cp35m-win_amd64.whl", hash = "sha256:b21bb4c09ffabfa0e85e3a6b623e19b80e7acd709b9f91452b8297ace2a8ab00"}, - {file = "wrapt-1.14.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:9e0fd32e0148dd5dea6af5fee42beb949098564cc23211a88d799e434255a1f4"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9736af4641846491aedb3c3f56b9bc5568d92b0692303b5a305301a95dfd38b1"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b02d65b9ccf0ef6c34cba6cf5bf2aab1bb2f49c6090bafeecc9cd81ad4ea1c1"}, - {file = "wrapt-1.14.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21ac0156c4b089b330b7666db40feee30a5d52634cc4560e1905d6529a3897ff"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:9f3e6f9e05148ff90002b884fbc2a86bd303ae847e472f44ecc06c2cd2fcdb2d"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:6e743de5e9c3d1b7185870f480587b75b1cb604832e380d64f9504a0535912d1"}, - {file = "wrapt-1.14.1-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:d79d7d5dc8a32b7093e81e97dad755127ff77bcc899e845f41bf71747af0c569"}, - {file = "wrapt-1.14.1-cp36-cp36m-win32.whl", hash = "sha256:81b19725065dcb43df02b37e03278c011a09e49757287dca60c5aecdd5a0b8ed"}, - {file = "wrapt-1.14.1-cp36-cp36m-win_amd64.whl", hash = "sha256:b014c23646a467558be7da3d6b9fa409b2c567d2110599b7cf9a0c5992b3b471"}, - {file = "wrapt-1.14.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:88bd7b6bd70a5b6803c1abf6bca012f7ed963e58c68d76ee20b9d751c74a3248"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5901a312f4d14c59918c221323068fad0540e34324925c8475263841dbdfe68"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77c85fedff92cf788face9bfa3ebaa364448ebb1d765302e9af11bf449ca36d"}, - {file = "wrapt-1.14.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d649d616e5c6a678b26d15ece345354f7c2286acd6db868e65fcc5ff7c24a77"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7d2872609603cb35ca513d7404a94d6d608fc13211563571117046c9d2bcc3d7"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:ee6acae74a2b91865910eef5e7de37dc6895ad96fa23603d1d27ea69df545015"}, - {file = "wrapt-1.14.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:2b39d38039a1fdad98c87279b48bc5dce2c0ca0d73483b12cb72aa9609278e8a"}, - {file = "wrapt-1.14.1-cp37-cp37m-win32.whl", hash = "sha256:60db23fa423575eeb65ea430cee741acb7c26a1365d103f7b0f6ec412b893853"}, - {file = "wrapt-1.14.1-cp37-cp37m-win_amd64.whl", hash = "sha256:709fe01086a55cf79d20f741f39325018f4df051ef39fe921b1ebe780a66184c"}, - {file = "wrapt-1.14.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8c0ce1e99116d5ab21355d8ebe53d9460366704ea38ae4d9f6933188f327b456"}, - {file = "wrapt-1.14.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e3fb1677c720409d5f671e39bac6c9e0e422584e5f518bfd50aa4cbbea02433f"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642c2e7a804fcf18c222e1060df25fc210b9c58db7c91416fb055897fc27e8cc"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b7c050ae976e286906dd3f26009e117eb000fb2cf3533398c5ad9ccc86867b1"}, - {file = "wrapt-1.14.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef3f72c9666bba2bab70d2a8b79f2c6d2c1a42a7f7e2b0ec83bb2f9e383950af"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01c205616a89d09827986bc4e859bcabd64f5a0662a7fe95e0d359424e0e071b"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5a0f54ce2c092aaf439813735584b9537cad479575a09892b8352fea5e988dc0"}, - {file = "wrapt-1.14.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2cf71233a0ed05ccdabe209c606fe0bac7379fdcf687f39b944420d2a09fdb57"}, - {file = "wrapt-1.14.1-cp38-cp38-win32.whl", hash = "sha256:aa31fdcc33fef9eb2552cbcbfee7773d5a6792c137b359e82879c101e98584c5"}, - {file = "wrapt-1.14.1-cp38-cp38-win_amd64.whl", hash = "sha256:d1967f46ea8f2db647c786e78d8cc7e4313dbd1b0aca360592d8027b8508e24d"}, - {file = "wrapt-1.14.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3232822c7d98d23895ccc443bbdf57c7412c5a65996c30442ebe6ed3df335383"}, - {file = "wrapt-1.14.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:988635d122aaf2bdcef9e795435662bcd65b02f4f4c1ae37fbee7401c440b3a7"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cca3c2cdadb362116235fdbd411735de4328c61425b0aa9f872fd76d02c4e86"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d52a25136894c63de15a35bc0bdc5adb4b0e173b9c0d07a2be9d3ca64a332735"}, - {file = "wrapt-1.14.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40e7bc81c9e2b2734ea4bc1aceb8a8f0ceaac7c5299bc5d69e37c44d9081d43b"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9b7a708dd92306328117d8c4b62e2194d00c365f18eff11a9b53c6f923b01e3"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6a9a25751acb379b466ff6be78a315e2b439d4c94c1e99cb7266d40a537995d3"}, - {file = "wrapt-1.14.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:34aa51c45f28ba7f12accd624225e2b1e5a3a45206aa191f6f9aac931d9d56fe"}, - {file = "wrapt-1.14.1-cp39-cp39-win32.whl", hash = "sha256:dee0ce50c6a2dd9056c20db781e9c1cfd33e77d2d569f5d1d9321c641bb903d5"}, - {file = "wrapt-1.14.1-cp39-cp39-win_amd64.whl", hash = "sha256:dee60e1de1898bde3b238f18340eec6148986da0455d8ba7848d50470a7a32fb"}, - {file = "wrapt-1.14.1.tar.gz", hash = "sha256:380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d"}, + {file = "wrapt-1.15.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46"}, + {file = "wrapt-1.15.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e"}, + {file = "wrapt-1.15.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923"}, + {file = "wrapt-1.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7"}, + {file = "wrapt-1.15.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90"}, + {file = "wrapt-1.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975"}, + {file = "wrapt-1.15.0-cp310-cp310-win32.whl", hash = "sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1"}, + {file = "wrapt-1.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7"}, + {file = "wrapt-1.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e"}, + {file = "wrapt-1.15.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92"}, + {file = "wrapt-1.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98"}, + {file = "wrapt-1.15.0-cp311-cp311-win32.whl", hash = "sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416"}, + {file = "wrapt-1.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb"}, + {file = "wrapt-1.15.0-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248"}, + {file = "wrapt-1.15.0-cp35-cp35m-win32.whl", hash = "sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559"}, + {file = "wrapt-1.15.0-cp35-cp35m-win_amd64.whl", hash = "sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639"}, + {file = "wrapt-1.15.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364"}, + {file = "wrapt-1.15.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418"}, + {file = "wrapt-1.15.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2"}, + {file = "wrapt-1.15.0-cp36-cp36m-win32.whl", hash = "sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1"}, + {file = "wrapt-1.15.0-cp36-cp36m-win_amd64.whl", hash = "sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420"}, + {file = "wrapt-1.15.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e"}, + {file = "wrapt-1.15.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034"}, + {file = "wrapt-1.15.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653"}, + {file = "wrapt-1.15.0-cp37-cp37m-win32.whl", hash = "sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0"}, + {file = "wrapt-1.15.0-cp37-cp37m-win_amd64.whl", hash = "sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145"}, + {file = "wrapt-1.15.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b"}, + {file = "wrapt-1.15.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094"}, + {file = "wrapt-1.15.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7"}, + {file = "wrapt-1.15.0-cp38-cp38-win32.whl", hash = "sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b"}, + {file = "wrapt-1.15.0-cp38-cp38-win_amd64.whl", hash = "sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86"}, + {file = "wrapt-1.15.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc"}, + {file = "wrapt-1.15.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8"}, + {file = "wrapt-1.15.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9"}, + {file = "wrapt-1.15.0-cp39-cp39-win32.whl", hash = "sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff"}, + {file = "wrapt-1.15.0-cp39-cp39-win_amd64.whl", hash = "sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6"}, + {file = "wrapt-1.15.0-py3-none-any.whl", hash = "sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640"}, + {file = "wrapt-1.15.0.tar.gz", hash = "sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a"}, ] diff --git a/server/pyproject.toml b/server/pyproject.toml index d3a8c112..63a3df6c 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -1,11 +1,11 @@ [tool.poetry] -name = "text-generation" -version = "0.2.1" +name = "text-generation-server" +version = "0.4.0" description = "Text Generation Inference Python gRPC Server" authors = ["Olivier Dehaene "] [tool.poetry.scripts] -text-generation-server = 'text_generation.cli:app' +text-generation-server = 'text_generation_server.cli:app' [tool.poetry.dependencies] python = "^3.9" @@ -22,6 +22,7 @@ loguru = "^0.6.0" opentelemetry-api = "^1.15.0" opentelemetry-exporter-otlp = "^1.15.0" opentelemetry-instrumentation-grpc = "^0.36b0" +hf-transfer = "^0.1.2" [tool.poetry.extras] bnb = ["bitsandbytes"] diff --git a/server/tests/conftest.py b/server/tests/conftest.py index 9fae8ee1..16d2c408 100644 --- a/server/tests/conftest.py +++ b/server/tests/conftest.py @@ -1,6 +1,6 @@ import pytest -from text_generation.pb import generate_pb2 +from text_generation_server.pb import generate_pb2 @pytest.fixture @@ -10,6 +10,7 @@ def default_pb_parameters(): repetition_penalty=1.0, top_k=0, top_p=1.0, + typical_p=1.0, do_sample=False, ) diff --git a/server/tests/models/test_bloom.py b/server/tests/models/test_bloom.py index b06d57f5..2b8ef5f8 100644 --- a/server/tests/models/test_bloom.py +++ b/server/tests/models/test_bloom.py @@ -4,9 +4,9 @@ import torch from copy import copy from transformers import AutoTokenizer -from text_generation.pb import generate_pb2 -from text_generation.models.causal_lm import CausalLMBatch -from text_generation.models.bloom import BloomCausalLMBatch, BLOOM +from text_generation_server.pb import generate_pb2 +from text_generation_server.models.causal_lm import CausalLMBatch +from text_generation_server.models.bloom import BloomCausalLMBatch, BLOOM @pytest.fixture(scope="session") @@ -24,7 +24,6 @@ def default_pb_request(default_pb_parameters, default_pb_stop_parameters): return generate_pb2.Request( id=0, inputs="Test", - input_length=1, parameters=default_pb_parameters, stopping_parameters=default_pb_stop_parameters, ) @@ -65,8 +64,8 @@ def test_batch_from_pb(default_pb_batch, default_bloom_batch): assert batch.input_ids[0][-1] == 10264 assert torch.all(batch.input_ids[0][:-1] == 3) - assert batch.attention_mask[0][-1] == 1 - assert torch.all(batch.attention_mask[0][:-1] == 0) + assert batch.attention_mask[0][0] == 1 + assert torch.all(batch.attention_mask[0][1:] == 0) assert batch.past_key_values is None @@ -77,7 +76,7 @@ def test_batch_from_pb(default_pb_batch, default_bloom_batch): assert batch.size == default_pb_batch.size assert len(batch.next_token_choosers) == len(batch.stopping_criterias) == batch.size - assert batch.max_sequence_length == batch.input_lengths[0] + assert batch.max_input_length == batch.input_lengths[0] def test_batch_concatenate_no_prefill(default_bloom_batch): @@ -98,22 +97,19 @@ def test_causal_lm_generate_token(default_bloom, default_bloom_batch): assert not next_batch.keys_head_dim_last assert len(next_batch.all_input_ids) == next_batch.size - assert ( - len(next_batch.all_input_ids[0]) - == len(next_batch.attention_mask[0]) - == sequence_length + 1 - ) + assert len(next_batch.all_input_ids[0]) == sequence_length + 1 + assert len(next_batch.attention_mask[0]) == 11 assert torch.all(next_batch.all_input_ids[0][-2:] == 10264) assert torch.all(next_batch.all_input_ids[0][:-2] == 3) - assert torch.all(next_batch.attention_mask[0][-2:] == 1) - assert torch.all(next_batch.attention_mask[0][:-2] == 0) + assert torch.all(next_batch.attention_mask[0][:2] == 1) + assert torch.all(next_batch.attention_mask[0][2:] == 0) assert next_batch.input_ids.shape == (next_batch.size, 1) assert next_batch.input_ids[0, 0] == 10264 assert next_batch.input_lengths == [2] - assert next_batch.max_sequence_length == next_batch.input_lengths[0] + assert next_batch.max_input_length == next_batch.input_lengths[0] assert next_batch.past_key_values is not None assert all( @@ -213,15 +209,19 @@ def test_batch_concatenate( assert torch.equal(next_batch.all_input_ids[1], next_batch_1.all_input_ids[0]) assert torch.equal(next_batch.all_input_ids[2], next_batch_1.all_input_ids[1]) - assert torch.all(next_batch.attention_mask[0] == 1) - assert torch.all(next_batch.attention_mask[1:, -2:] == 1) - assert torch.all(next_batch.attention_mask[1:, :-2] == 0) + assert torch.all( + next_batch.attention_mask[0, : -next_batch.padding_right_offset] == 1 + ) + assert torch.all( + next_batch.attention_mask[1:, 1 : -next_batch.padding_right_offset] == 1 + ) + assert torch.all(next_batch.attention_mask[1:, 3:] == 0) assert next_batch.batch_id == 0 assert torch.all(next_batch.input_ids == 10264) assert next_batch.input_lengths == [3, 2, 2] - assert next_batch.max_sequence_length == 3 + assert next_batch.max_input_length == 3 assert next_batch.requests[0] == next_batch_0.requests[0] assert next_batch.requests[1:] == next_batch_1.requests diff --git a/server/tests/models/test_causal_lm.py b/server/tests/models/test_causal_lm.py index 6a822815..76617b62 100644 --- a/server/tests/models/test_causal_lm.py +++ b/server/tests/models/test_causal_lm.py @@ -4,8 +4,8 @@ import torch from copy import copy from transformers import AutoTokenizer -from text_generation.pb import generate_pb2 -from text_generation.models.causal_lm import CausalLM, CausalLMBatch +from text_generation_server.pb import generate_pb2 +from text_generation_server.models.causal_lm import CausalLM, CausalLMBatch @pytest.fixture(scope="session") @@ -25,7 +25,6 @@ def default_pb_request(default_pb_parameters, default_pb_stop_parameters): return generate_pb2.Request( id=0, inputs="Test", - input_length=1, parameters=default_pb_parameters, stopping_parameters=default_pb_stop_parameters, ) @@ -62,8 +61,8 @@ def test_batch_from_pb(default_pb_batch, default_causal_lm_batch): assert batch.input_ids[0][-1] == 14402 assert torch.all(batch.input_ids[0][:-1] == 50256) - assert batch.attention_mask[0][-1] == 1 - assert torch.all(batch.attention_mask[0][:-1] == 0) + assert batch.attention_mask[0, 0] == 1 + assert torch.all(batch.attention_mask[0, 1:] == 0) assert batch.past_key_values is None @@ -74,7 +73,7 @@ def test_batch_from_pb(default_pb_batch, default_causal_lm_batch): assert batch.size == default_pb_batch.size assert len(batch.next_token_choosers) == len(batch.stopping_criterias) == batch.size - assert batch.max_sequence_length == batch.input_lengths[0] + assert batch.max_input_length == batch.input_lengths[0] def test_batch_concatenate_no_prefill(default_causal_lm_batch): @@ -94,23 +93,20 @@ def test_causal_lm_generate_token(default_causal_lm, default_causal_lm_batch): assert isinstance(next_batch, CausalLMBatch) assert len(next_batch.all_input_ids) == next_batch.size - assert ( - len(next_batch.all_input_ids[0]) - == len(next_batch.attention_mask[0]) - == sequence_length + 1 - ) + assert len(next_batch.all_input_ids[0]) == sequence_length + 1 + assert len(next_batch.attention_mask[0]) == 11 assert next_batch.all_input_ids[0][-1] == 13 assert next_batch.all_input_ids[0][-2] == 14402 assert torch.all(next_batch.all_input_ids[0][:-2] == 50256) - assert torch.all(next_batch.attention_mask[0][-2:] == 1) - assert torch.all(next_batch.attention_mask[0][:-2] == 0) + assert torch.all(next_batch.attention_mask[0][0:2] == 1) + assert torch.all(next_batch.attention_mask[0][2:] == 0) assert next_batch.input_ids.shape == (next_batch.size, 1) assert next_batch.input_ids[0, 0] == 13 assert next_batch.input_lengths == [2] - assert next_batch.max_sequence_length == next_batch.input_lengths[0] + assert next_batch.max_input_length == next_batch.input_lengths[0] assert next_batch.past_key_values is not None assert all( @@ -210,16 +206,20 @@ def test_batch_concatenate( assert torch.equal(next_batch.all_input_ids[1], next_batch_1.all_input_ids[0]) assert torch.equal(next_batch.all_input_ids[2], next_batch_1.all_input_ids[1]) - assert torch.all(next_batch.attention_mask[0] == 1) - assert torch.all(next_batch.attention_mask[1:, -2:] == 1) - assert torch.all(next_batch.attention_mask[1:, :-2] == 0) + assert torch.all( + next_batch.attention_mask[0, : -next_batch.padding_right_offset] == 1 + ) + assert torch.all( + next_batch.attention_mask[1:, 1 : -next_batch.padding_right_offset] == 1 + ) + assert torch.all(next_batch.attention_mask[1:, 3:] == 0) assert next_batch.batch_id == 0 assert next_batch.input_ids[0, 0] == 12355 assert torch.all(next_batch.input_ids[1:] == 13) assert next_batch.input_lengths == [3, 2, 2] - assert next_batch.max_sequence_length == 3 + assert next_batch.max_input_length == 3 assert next_batch.requests[0] == next_batch_0.requests[0] assert next_batch.requests[1:] == next_batch_1.requests diff --git a/server/tests/models/test_santacoder.py b/server/tests/models/test_santacoder.py index 1596e413..753ff5fc 100644 --- a/server/tests/models/test_santacoder.py +++ b/server/tests/models/test_santacoder.py @@ -1,8 +1,8 @@ import pytest -from text_generation.pb import generate_pb2 -from text_generation.models.causal_lm import CausalLMBatch -from text_generation.models.santacoder import SantaCoder +from text_generation_server.pb import generate_pb2 +from text_generation_server.models.causal_lm import CausalLMBatch +from text_generation_server.models.santacoder import SantaCoder @pytest.fixture(scope="session") @@ -15,7 +15,6 @@ def default_pb_request(default_pb_parameters, default_pb_stop_parameters): return generate_pb2.Request( id=0, inputs="def", - input_length=1, parameters=default_pb_parameters, stopping_parameters=default_pb_stop_parameters, ) @@ -31,7 +30,6 @@ def default_fim_pb_request(default_pb_parameters, default_pb_stop_parameters): return generate_pb2.Request( id=0, inputs="defworld", - input_length=5, parameters=default_pb_parameters, stopping_parameters=default_pb_stop_parameters, ) diff --git a/server/tests/models/test_seq2seq_lm.py b/server/tests/models/test_seq2seq_lm.py index 22c6ac9c..2d86c44b 100644 --- a/server/tests/models/test_seq2seq_lm.py +++ b/server/tests/models/test_seq2seq_lm.py @@ -5,8 +5,8 @@ from copy import copy from transformers import AutoTokenizer -from text_generation.pb import generate_pb2 -from text_generation.models.seq2seq_lm import Seq2SeqLM, Seq2SeqLMBatch +from text_generation_server.pb import generate_pb2 +from text_generation_server.models.seq2seq_lm import Seq2SeqLM, Seq2SeqLMBatch @pytest.fixture(scope="session") @@ -28,7 +28,6 @@ def default_pb_request(default_pb_parameters, default_pb_stop_parameters): return generate_pb2.Request( id=0, inputs="Test", - input_length=2, parameters=default_pb_parameters, stopping_parameters=default_pb_stop_parameters, ) @@ -106,7 +105,7 @@ def test_seq2seq_lm_generate_token(default_seq2seq_lm, default_seq2seq_lm_batch) assert len(generations) == len(next_batch) assert isinstance(next_batch, Seq2SeqLMBatch) - assert torch.equal(next_batch.input_ids, default_seq2seq_lm_batch.input_ids) + assert next_batch.input_ids is None assert torch.equal( next_batch.attention_mask, default_seq2seq_lm_batch.attention_mask ) @@ -148,7 +147,7 @@ def test_seq2seq_lm_generate_token(default_seq2seq_lm, default_seq2seq_lm_batch) assert all([generation.generated_text is None for generation in generations]) assert all([len(generation.prefill_tokens) == 1 for generation in generations]) assert all([generation.token_id.item() == 259 for generation in generations]) - assert all([generation.token_text == "" for generation in generations]) + assert all([generation.token_text == " " for generation in generations]) assert generations[0].request_id == 0 @@ -220,11 +219,6 @@ def test_batch_concatenate( assert next_batch.batch_id == 0 - assert torch.all(next_batch.input_ids[:, 0] == 4268) - assert torch.all(next_batch.input_ids[:, 1] == 1) - - assert torch.all(next_batch.attention_mask == 1) - assert torch.equal( next_batch.decoder_input_ids[0], next_batch_0.decoder_input_ids[0] ) @@ -233,9 +227,10 @@ def test_batch_concatenate( next_batch.decoder_input_ids[1:, -2:], next_batch_1.decoder_input_ids ) - assert torch.all(next_batch.decoder_attention_mask[0] == 1) + assert torch.all(next_batch.decoder_attention_mask[0, :3] == 1) + assert torch.all(next_batch.decoder_attention_mask[0, 3:] == 0) assert torch.all(next_batch.decoder_attention_mask[1:, 0] == 0) - assert torch.all(next_batch.decoder_attention_mask[1:, -2:] == 1) + assert torch.all(next_batch.decoder_attention_mask[1:, 1:3] == 1) assert torch.equal( next_batch.encoder_last_hidden_state[0], diff --git a/server/tests/utils/test_convert.py b/server/tests/utils/test_convert.py new file mode 100644 index 00000000..7dfe6a1e --- /dev/null +++ b/server/tests/utils/test_convert.py @@ -0,0 +1,21 @@ +from text_generation_server.utils.hub import ( + download_weights, + weight_hub_files, + weight_files, +) + +from text_generation_server.utils.convert import convert_files + + +def test_convert_files(): + model_id = "bigscience/bloom-560m" + pt_filenames = weight_hub_files(model_id, extension=".bin") + local_pt_files = download_weights(pt_filenames, model_id) + local_st_files = [ + p.parent / f"{p.stem.lstrip('pytorch_')}.safetensors" for p in local_pt_files + ] + convert_files(local_pt_files, local_st_files) + + found_st_files = weight_files(model_id) + + assert all([p in found_st_files for p in local_st_files]) diff --git a/server/tests/utils/test_hub.py b/server/tests/utils/test_hub.py new file mode 100644 index 00000000..fac9a64d --- /dev/null +++ b/server/tests/utils/test_hub.py @@ -0,0 +1,40 @@ +import pytest + +from text_generation_server.utils.hub import ( + weight_hub_files, + download_weights, + weight_files, + EntryNotFoundError, + LocalEntryNotFoundError, + RevisionNotFoundError, +) + + +def test_weight_hub_files(): + filenames = weight_hub_files("bigscience/bloom-560m") + assert filenames == ["model.safetensors"] + + +def test_weight_hub_files_llm(): + filenames = weight_hub_files("bigscience/bloom") + assert filenames == [f"model_{i:05d}-of-00072.safetensors" for i in range(1, 73)] + + +def test_weight_hub_files_empty(): + with pytest.raises(EntryNotFoundError): + weight_hub_files("bigscience/bloom", extension=".errors") + + +def test_download_weights(): + model_id = "bigscience/bloom-560m" + filenames = weight_hub_files(model_id) + files = download_weights(filenames, model_id) + local_files = weight_files("bigscience/bloom-560m") + assert files == local_files + + +def test_weight_files_error(): + with pytest.raises(RevisionNotFoundError): + weight_files("bigscience/bloom-560m", revision="error") + with pytest.raises(LocalEntryNotFoundError): + weight_files("bert-base-uncased") diff --git a/server/tests/test_utils.py b/server/tests/utils/test_tokens.py similarity index 52% rename from server/tests/test_utils.py rename to server/tests/utils/test_tokens.py index ffe9be65..3883ad97 100644 --- a/server/tests/test_utils.py +++ b/server/tests/utils/test_tokens.py @@ -1,14 +1,6 @@ -import pytest - -from huggingface_hub.utils import RevisionNotFoundError - -from text_generation.utils import ( - weight_hub_files, - download_weights, - weight_files, +from text_generation_server.utils.tokens import ( StopSequenceCriteria, StoppingCriteria, - LocalEntryNotFoundError, FinishReason, ) @@ -41,31 +33,3 @@ def test_stopping_criteria_max(): assert criteria(1, "") == (False, None) assert criteria(1, "") == (False, None) assert criteria(1, "") == (True, FinishReason.FINISH_REASON_LENGTH) - - -def test_weight_hub_files(): - filenames = weight_hub_files("bigscience/bloom-560m") - assert filenames == ["model.safetensors"] - - -def test_weight_hub_files_llm(): - filenames = weight_hub_files("bigscience/bloom") - assert filenames == [f"model_{i:05d}-of-00072.safetensors" for i in range(1, 73)] - - -def test_weight_hub_files_empty(): - filenames = weight_hub_files("bigscience/bloom", extension=".errors") - assert filenames == [] - - -def test_download_weights(): - files = download_weights("bigscience/bloom-560m") - local_files = weight_files("bigscience/bloom-560m") - assert files == local_files - - -def test_weight_files_error(): - with pytest.raises(RevisionNotFoundError): - weight_files("bigscience/bloom-560m", revision="error") - with pytest.raises(LocalEntryNotFoundError): - weight_files("bert-base-uncased") diff --git a/server/text_generation/cli.py b/server/text_generation/cli.py deleted file mode 100644 index e9c8ea92..00000000 --- a/server/text_generation/cli.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -import sys -import typer - -from pathlib import Path -from loguru import logger -from typing import Optional - -from text_generation import server, utils -from text_generation.tracing import setup_tracing - -app = typer.Typer() - - -@app.command() -def serve( - model_id: str, - revision: Optional[str] = None, - sharded: bool = False, - quantize: bool = False, - uds_path: Path = "/tmp/text-generation", - logger_level: str = "INFO", - json_output: bool = False, - otlp_endpoint: Optional[str] = None, -): - if sharded: - assert ( - os.getenv("RANK", None) is not None - ), "RANK must be set when sharded is True" - assert ( - os.getenv("WORLD_SIZE", None) is not None - ), "WORLD_SIZE must be set when sharded is True" - assert ( - os.getenv("MASTER_ADDR", None) is not None - ), "MASTER_ADDR must be set when sharded is True" - assert ( - os.getenv("MASTER_PORT", None) is not None - ), "MASTER_PORT must be set when sharded is True" - - # Remove default handler - logger.remove() - logger.add( - sys.stdout, - format="{message}", - filter="text_generation", - level=logger_level, - serialize=json_output, - backtrace=True, - diagnose=False, - ) - # Setup OpenTelemetry distributed tracing - if otlp_endpoint is not None: - setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint) - - server.serve(model_id, revision, sharded, quantize, uds_path) - - -@app.command() -def download_weights( - model_id: str, - revision: Optional[str] = None, - extension: str = ".safetensors", -): - utils.download_weights(model_id, revision, extension) - - -if __name__ == "__main__": - app() diff --git a/server/text_generation/models/model.py b/server/text_generation/models/model.py deleted file mode 100644 index ef6a5682..00000000 --- a/server/text_generation/models/model.py +++ /dev/null @@ -1,24 +0,0 @@ -import torch - -from abc import ABC, abstractmethod -from typing import List, Tuple, Optional, TypeVar, Type -from transformers import PreTrainedTokenizerBase - -from text_generation.models.types import Batch, GeneratedText - -B = TypeVar("B", bound=Batch) - - -class Model(ABC): - def __init__(self, tokenizer: PreTrainedTokenizerBase, device: torch.device): - self.tokenizer = tokenizer - self.device = device - - @property - @abstractmethod - def batch_type(self) -> Type[B]: - raise NotImplementedError - - @abstractmethod - def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]: - raise NotImplementedError diff --git a/server/text_generation/utils.py b/server/text_generation/utils.py deleted file mode 100644 index 3b3f08c7..00000000 --- a/server/text_generation/utils.py +++ /dev/null @@ -1,283 +0,0 @@ -import concurrent -import os -import re -import torch -import torch.distributed - -from datetime import timedelta - -from concurrent.futures import ThreadPoolExecutor -from functools import partial -from pathlib import Path -from huggingface_hub import HfApi, hf_hub_download, _CACHED_NO_EXIST -from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE -from huggingface_hub.utils import LocalEntryNotFoundError -from tqdm import tqdm -from typing import List, Optional, Tuple -from transformers import PreTrainedTokenizerBase -from transformers.generation.logits_process import ( - LogitsProcessorList, - RepetitionPenaltyLogitsProcessor, - TemperatureLogitsWarper, - TopPLogitsWarper, - TopKLogitsWarper, -) - -from text_generation.pb import generate_pb2 -from text_generation.pb.generate_pb2 import FinishReason - -WEIGHTS_CACHE_OVERRIDE = os.getenv("WEIGHTS_CACHE_OVERRIDE", None) - - -class Sampling: - def __init__(self, seed: int, device: str = "cpu"): - self.generator = torch.Generator(device) - self.generator.manual_seed(seed) - self.seed = seed - - def __call__(self, logits): - probs = torch.nn.functional.softmax(logits) - next_tokens = torch.multinomial(probs, num_samples=1, generator=self.generator) - return next_tokens - - -class Greedy: - def __call__(self, logits): - return logits.argmax() - - -class NextTokenChooser: - def __init__( - self, - temperature=1.0, - repetition_penalty=1.0, - top_k=None, - top_p=None, - do_sample=False, - seed=0, - device="cpu", - ): - warpers = LogitsProcessorList() - # the following idea is largely copied from this PR: https://github.com/huggingface/transformers/pull/5420/files - # all samplers can be found in `generation_utils_samplers.py` - sampling = do_sample - if temperature is not None and temperature != 1.0: - temperature = float(temperature) - warpers.append(TemperatureLogitsWarper(temperature)) - sampling = True - if top_k is not None and top_k != 0: - warpers.append(TopKLogitsWarper(top_k=top_k)) - sampling = True - if top_p is not None and top_p < 1.0: - warpers.append(TopPLogitsWarper(top_p=top_p)) - sampling = True - if repetition_penalty is not None and repetition_penalty != 1.0: - warpers.append(RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty)) - - self.warpers = warpers - self.choice = Sampling(seed, device) if sampling else Greedy() - - def __call__(self, input_ids, scores): - # Warp logits - scores = self.warpers(input_ids, scores) - - # Compute logprobs - logprobs = torch.log_softmax(scores, -1) - - # Choose tokens - next_id = self.choice(scores[-1]) - - return next_id.view(1, 1), logprobs - - @classmethod - def from_pb( - cls, pb: generate_pb2.NextTokenChooserParameters, device: torch.device - ) -> "NextTokenChooser": - return NextTokenChooser( - temperature=pb.temperature, - repetition_penalty=pb.repetition_penalty, - top_k=pb.top_k, - top_p=pb.top_p, - do_sample=pb.do_sample, - seed=pb.seed, - device=device, - ) - - -class StopSequenceCriteria: - def __init__(self, stop_sequence: str): - self.regex = re.compile(f".*{stop_sequence}$") - - def __call__(self, output: str) -> bool: - if self.regex.findall(output): - return True - return False - - -class StoppingCriteria: - def __init__( - self, - eos_token_id: int, - stop_sequence_criterias: List[StopSequenceCriteria], - max_new_tokens=20, - ): - self.eos_token_id = eos_token_id - self.stop_sequence_criterias = stop_sequence_criterias - self.max_new_tokens = max_new_tokens - self.current_tokens = 0 - self.current_output = "" - - def __call__(self, last_token: int, last_output: str) -> Tuple[bool, Optional[str]]: - self.current_tokens += 1 - if self.current_tokens >= self.max_new_tokens: - return True, FinishReason.FINISH_REASON_LENGTH - - if last_token == self.eos_token_id: - return True, FinishReason.FINISH_REASON_EOS_TOKEN - - self.current_output += last_output - for stop_sequence_criteria in self.stop_sequence_criterias: - if stop_sequence_criteria(self.current_output): - return True, FinishReason.FINISH_REASON_STOP_SEQUENCE - - return False, None - - @classmethod - def from_pb( - cls, - pb: generate_pb2.StoppingCriteriaParameters, - tokenizer: PreTrainedTokenizerBase, - ) -> "StoppingCriteria": - stop_sequence_criterias = [ - StopSequenceCriteria(sequence) for sequence in pb.stop_sequences - ] - return StoppingCriteria( - tokenizer.eos_token_id, stop_sequence_criterias, pb.max_new_tokens - ) - - -def initialize_torch_distributed(): - rank = int(os.getenv("RANK", "0")) - world_size = int(os.getenv("WORLD_SIZE", "1")) - - if torch.cuda.is_available(): - from torch.distributed import ProcessGroupNCCL - - # Set the device id. - assert world_size <= torch.cuda.device_count(), "Each process is one gpu" - device = rank % torch.cuda.device_count() - torch.cuda.set_device(device) - backend = "nccl" - options = ProcessGroupNCCL.Options() - options.is_high_priority_stream = True - options._timeout = timedelta(seconds=60) - else: - backend = "gloo" - options = None - - # Call the init process. - torch.distributed.init_process_group( - backend=backend, - world_size=world_size, - rank=rank, - timeout=timedelta(seconds=60), - pg_options=options, - ) - - return torch.distributed.group.WORLD, rank, world_size - - -def weight_hub_files(model_id, revision=None, extension=".safetensors"): - """Get the safetensors filenames on the hub""" - api = HfApi() - info = api.model_info(model_id, revision=revision) - filenames = [s.rfilename for s in info.siblings if s.rfilename.endswith(extension)] - return filenames - - -def try_to_load_from_cache(model_id, revision, filename): - """Try to load a file from the Hugging Face cache""" - if revision is None: - revision = "main" - - object_id = model_id.replace("/", "--") - repo_cache = Path(HUGGINGFACE_HUB_CACHE) / f"models--{object_id}" - - if not repo_cache.is_dir(): - # No cache for this model - return None - - refs_dir = repo_cache / "refs" - snapshots_dir = repo_cache / "snapshots" - no_exist_dir = repo_cache / ".no_exist" - - # Resolve refs (for instance to convert main to the associated commit sha) - if refs_dir.is_dir(): - revision_file = refs_dir / revision - if revision_file.exists(): - with revision_file.open() as f: - revision = f.read() - - # Check if file is cached as "no_exist" - if (no_exist_dir / revision / filename).is_file(): - return _CACHED_NO_EXIST - - # Check if revision folder exists - if not snapshots_dir.exists(): - return None - cached_shas = os.listdir(snapshots_dir) - if revision not in cached_shas: - # No cache for this revision and we won't try to return a random revision - return None - - # Check if file exists in cache - cached_file = snapshots_dir / revision / filename - return str(cached_file) if cached_file.is_file() else None - - -def weight_files(model_id, revision=None, extension=".safetensors"): - """Get the local safetensors filenames""" - if WEIGHTS_CACHE_OVERRIDE is not None: - return list(Path(WEIGHTS_CACHE_OVERRIDE).glob(f"*{extension}")) - - filenames = weight_hub_files(model_id, revision, extension) - files = [] - for filename in filenames: - cache_file = try_to_load_from_cache( - model_id, revision=revision, filename=filename - ) - if cache_file is None: - raise LocalEntryNotFoundError( - f"File {filename} of model {model_id} not found in " - f"{os.getenv('HUGGINGFACE_HUB_CACHE', 'the local cache')}. " - f"Please run `text-generation-server download-weights {model_id}` first." - ) - files.append(cache_file) - - return files - - -def download_weights(model_id, revision=None, extension=".safetensors"): - """Download the safetensors files from the hub""" - if WEIGHTS_CACHE_OVERRIDE is not None: - return list(Path(WEIGHTS_CACHE_OVERRIDE).glob(f"*{extension}")) - - filenames = weight_hub_files(model_id, revision, extension) - - download_function = partial( - hf_hub_download, - repo_id=model_id, - local_files_only=False, - ) - - executor = ThreadPoolExecutor(max_workers=5) - futures = [ - executor.submit(download_function, filename=filename, revision=revision) - for filename in filenames - ] - files = [ - future.result() - for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)) - ] - - return files diff --git a/server/text_generation/__init__.py b/server/text_generation_server/__init__.py similarity index 100% rename from server/text_generation/__init__.py rename to server/text_generation_server/__init__.py diff --git a/server/text_generation/cache.py b/server/text_generation_server/cache.py similarity index 90% rename from server/text_generation/cache.py rename to server/text_generation_server/cache.py index 5a3a8d31..72dc4857 100644 --- a/server/text_generation/cache.py +++ b/server/text_generation_server/cache.py @@ -1,6 +1,6 @@ from typing import Dict, Optional, TypeVar -from text_generation.models.types import Batch +from text_generation_server.models.types import Batch B = TypeVar("B", bound=Batch) diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py new file mode 100644 index 00000000..6308ef6b --- /dev/null +++ b/server/text_generation_server/cli.py @@ -0,0 +1,115 @@ +import os +import sys +import typer + +from pathlib import Path +from loguru import logger +from typing import Optional + +from text_generation_server import server, utils +from text_generation_server.tracing import setup_tracing + +app = typer.Typer() + + +@app.command() +def serve( + model_id: str, + revision: Optional[str] = None, + sharded: bool = False, + quantize: bool = False, + uds_path: Path = "/tmp/text-generation", + logger_level: str = "INFO", + json_output: bool = False, + otlp_endpoint: Optional[str] = None, +): + if sharded: + assert ( + os.getenv("RANK", None) is not None + ), "RANK must be set when sharded is True" + assert ( + os.getenv("WORLD_SIZE", None) is not None + ), "WORLD_SIZE must be set when sharded is True" + assert ( + os.getenv("MASTER_ADDR", None) is not None + ), "MASTER_ADDR must be set when sharded is True" + assert ( + os.getenv("MASTER_PORT", None) is not None + ), "MASTER_PORT must be set when sharded is True" + + # Remove default handler + logger.remove() + logger.add( + sys.stdout, + format="{message}", + filter="text_generation_server", + level=logger_level, + serialize=json_output, + backtrace=True, + diagnose=False, + ) + # Setup OpenTelemetry distributed tracing + if otlp_endpoint is not None: + setup_tracing(shard=os.getenv("RANK", 0), otlp_endpoint=otlp_endpoint) + + server.serve(model_id, revision, sharded, quantize, uds_path) + + +@app.command() +def download_weights( + model_id: str, + revision: Optional[str] = None, + extension: str = ".safetensors", + logger_level: str = "INFO", + json_output: bool = False, +): + # Remove default handler + logger.remove() + logger.add( + sys.stdout, + format="{message}", + filter="text_generation_server", + level=logger_level, + serialize=json_output, + backtrace=True, + diagnose=False, + ) + + # Test if files were already download + try: + utils.weight_files(model_id, revision, extension) + logger.info( + "Files are already present in the local cache. " "Skipping download." + ) + return + # Local files not found + except utils.LocalEntryNotFoundError: + pass + + # Download weights directly + try: + filenames = utils.weight_hub_files(model_id, revision, extension) + utils.download_weights(filenames, model_id, revision) + except utils.EntryNotFoundError as e: + if not extension == ".safetensors": + raise e + + logger.warning( + f"No safetensors weights found for model {model_id} at revision {revision}. " + f"Converting PyTorch weights instead." + ) + + # Try to see if there are pytorch weights + pt_filenames = utils.weight_hub_files(model_id, revision, ".bin") + # Download pytorch weights + local_pt_files = utils.download_weights(pt_filenames, model_id, revision) + local_st_files = [ + p.parent / f"{p.stem.lstrip('pytorch_')}.safetensors" + for p in local_pt_files + ] + # Convert pytorch weights to safetensors + utils.convert_files(local_pt_files, local_st_files) + + +if __name__ == "__main__": + app() diff --git a/server/text_generation/interceptor.py b/server/text_generation_server/interceptor.py similarity index 100% rename from server/text_generation/interceptor.py rename to server/text_generation_server/interceptor.py diff --git a/server/text_generation/models/__init__.py b/server/text_generation_server/models/__init__.py similarity index 63% rename from server/text_generation/models/__init__.py rename to server/text_generation_server/models/__init__.py index 7445b427..3e2f5c66 100644 --- a/server/text_generation/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -3,14 +3,14 @@ import torch from transformers import AutoConfig from typing import Optional -from text_generation.models.model import Model -from text_generation.models.causal_lm import CausalLM -from text_generation.models.bloom import BLOOM, BLOOMSharded -from text_generation.models.seq2seq_lm import Seq2SeqLM -from text_generation.models.galactica import Galactica, GalacticaSharded -from text_generation.models.santacoder import SantaCoder -from text_generation.models.gpt_neox import GPTNeox, GPTNeoxSharded -from text_generation.models.t5 import T5Sharded +from text_generation_server.models.model import Model +from text_generation_server.models.causal_lm import CausalLM +from text_generation_server.models.bloom import BLOOM, BLOOMSharded +from text_generation_server.models.seq2seq_lm import Seq2SeqLM +from text_generation_server.models.galactica import Galactica, GalacticaSharded +from text_generation_server.models.santacoder import SantaCoder +from text_generation_server.models.gpt_neox import GPTNeoxSharded +from text_generation_server.models.t5 import T5Sharded __all__ = [ "Model", @@ -19,7 +19,6 @@ __all__ = [ "CausalLM", "Galactica", "GalacticaSharded", - "GPTNeox", "GPTNeoxSharded", "Seq2SeqLM", "SantaCoder", @@ -41,6 +40,15 @@ torch.set_grad_enabled(False) def get_model( model_id: str, revision: Optional[str], sharded: bool, quantize: bool ) -> Model: + if "facebook/galactica" in model_id: + if sharded: + return GalacticaSharded(model_id, revision, quantize=quantize) + else: + return Galactica(model_id, revision, quantize=quantize) + + if "santacoder" in model_id: + return SantaCoder(model_id, revision, quantize) + config = AutoConfig.from_pretrained(model_id, revision=revision) if config.model_type == "bloom": @@ -48,27 +56,22 @@ def get_model( return BLOOMSharded(model_id, revision, quantize=quantize) else: return BLOOM(model_id, revision, quantize=quantize) - elif config.model_type == "gpt_neox": + + if config.model_type == "gpt_neox": if sharded: return GPTNeoxSharded(model_id, revision, quantize=quantize) else: - return GPTNeox(model_id, revision, quantize=quantize) - elif config.model_type == "t5": + return CausalLM(model_id, revision, quantize=quantize) + + if config.model_type == "t5": if sharded: return T5Sharded(model_id, revision, quantize=quantize) else: return Seq2SeqLM(model_id, revision, quantize=quantize) - elif model_id.startswith("facebook/galactica"): - if sharded: - return GalacticaSharded(model_id, revision, quantize=quantize) - else: - return Galactica(model_id, revision, quantize=quantize) - elif "santacoder" in model_id: - return SantaCoder(model_id, revision, quantize) - else: - if sharded: - raise ValueError("sharded is not supported for AutoModel") - try: - return CausalLM(model_id, revision, quantize=quantize) - except Exception: - return Seq2SeqLM(model_id, revision, quantize=quantize) + + if sharded: + raise ValueError("sharded is not supported for AutoModel") + try: + return CausalLM(model_id, revision, quantize=quantize) + except Exception: + return Seq2SeqLM(model_id, revision, quantize=quantize) diff --git a/server/text_generation/models/bloom.py b/server/text_generation_server/models/bloom.py similarity index 93% rename from server/text_generation/models/bloom.py rename to server/text_generation_server/models/bloom.py index 992d7b5b..0d83abe2 100644 --- a/server/text_generation/models/bloom.py +++ b/server/text_generation_server/models/bloom.py @@ -17,13 +17,12 @@ from transformers.models.bloom.parallel_layers import ( TensorParallelRowLinear, ) -from text_generation.models import CausalLM -from text_generation.models.causal_lm import CausalLMBatch -from text_generation.pb import generate_pb2 -from text_generation.utils import ( +from text_generation_server.models import CausalLM +from text_generation_server.models.causal_lm import CausalLMBatch +from text_generation_server.pb import generate_pb2 +from text_generation_server.utils import ( initialize_torch_distributed, weight_files, - download_weights, ) HAS_BITS_AND_BYTES = True @@ -59,9 +58,6 @@ class BLOOMSharded(BLOOM): def __init__( self, model_id: str, revision: Optional[str] = None, quantize: bool = False ): - if not model_id.startswith("bigscience/bloom"): - raise ValueError(f"Model {model_id} is not supported") - self.process_group, self.rank, self.world_size = initialize_torch_distributed() self.master = self.rank == 0 if torch.cuda.is_available(): @@ -80,14 +76,8 @@ class BLOOMSharded(BLOOM): ) config.pad_token_id = 3 - # Only download weights for small models - if self.master and model_id == "bigscience/bloom-560m": - download_weights(model_id, revision=revision, extension=".safetensors") - torch.distributed.barrier(group=self.process_group) filenames = weight_files(model_id, revision=revision, extension=".safetensors") - if not filenames: - raise ValueError("No safetensors weights found") with init_empty_weights(): model = AutoModelForCausalLM.from_config(config) diff --git a/server/text_generation/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py similarity index 80% rename from server/text_generation/models/causal_lm.py rename to server/text_generation_server/models/causal_lm.py index 1ac073b3..88ea6c75 100644 --- a/server/text_generation/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -5,10 +5,15 @@ from opentelemetry import trace from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedTokenizerBase from typing import Optional, Tuple, List, Type -from text_generation.models import Model -from text_generation.models.types import Batch, PrefillTokens, Generation, GeneratedText -from text_generation.pb import generate_pb2 -from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling +from text_generation_server.models import Model +from text_generation_server.models.types import ( + Batch, + PrefillTokens, + Generation, + GeneratedText, +) +from text_generation_server.pb import generate_pb2 +from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling tracer = trace.get_tracer(__name__) @@ -36,7 +41,8 @@ class CausalLMBatch(Batch): # Metadata used for padding size: int - max_sequence_length: int + max_input_length: int + padding_right_offset: int # Past metadata keys_head_dim_last: bool = True @@ -61,22 +67,36 @@ class CausalLMBatch(Batch): input_lengths = [] # Parse batch + padding_right_offset = 0 for r in pb.requests: inputs.append(r.inputs) - input_lengths.append(r.input_length) next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device)) - stopping_criterias.append( - StoppingCriteria.from_pb(r.stopping_parameters, tokenizer) + stopping_criteria = StoppingCriteria.from_pb( + r.stopping_parameters, tokenizer + ) + stopping_criterias.append(stopping_criteria) + padding_right_offset = max( + padding_right_offset, stopping_criteria.max_new_tokens ) - pad_to_multiple_of = 8 if device.type == "cuda" else None tokenized_inputs = tokenizer( inputs, return_tensors="pt", padding=True, - pad_to_multiple_of=pad_to_multiple_of, return_token_type_ids=False, ).to(device) + + input_lengths = tokenized_inputs["attention_mask"].sum(1) + max_input_length = input_lengths.max() + + input_ids = tokenized_inputs["input_ids"] + # Allocate maximum attention_mask + attention_mask = input_ids.new_zeros( + (pb.size, max_input_length + padding_right_offset) + ) + # Copy tokenizer attention_mask into fully allocated attention_mask + attention_mask[:, :max_input_length] = tokenized_inputs["attention_mask"] + position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1 position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1) all_input_ids = tokenized_inputs["input_ids"].unsqueeze(-1) @@ -84,24 +104,30 @@ class CausalLMBatch(Batch): return cls( batch_id=pb.id, requests=pb.requests, - input_ids=tokenized_inputs["input_ids"], - attention_mask=tokenized_inputs["attention_mask"], + input_ids=input_ids, + attention_mask=attention_mask, position_ids=position_ids, past_key_values=None, all_input_ids=all_input_ids, - input_lengths=input_lengths, + input_lengths=input_lengths.tolist(), next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, size=pb.size, - max_sequence_length=max(input_lengths), + max_input_length=max_input_length.item(), + padding_right_offset=padding_right_offset, ) @classmethod @tracer.start_as_current_span("concatenate") def concatenate(cls, batches: List["CausalLMBatch"]) -> "CausalLMBatch": # Used for padding - total_batch_size = sum(batch.size for batch in batches) - max_sequence_length = max(batch.max_sequence_length for batch in batches) + total_batch_size = 0 + max_input_length = 0 + padding_right_offset = 0 + for batch in batches: + total_batch_size += batch.size + max_input_length = max(max_input_length, batch.max_input_length) + padding_right_offset = max(padding_right_offset, batch.padding_right_offset) # Batch attributes requests = [] @@ -144,13 +170,24 @@ class CausalLMBatch(Batch): # Create padded tensor if attention_mask is None: attention_mask = batch.attention_mask.new_zeros( - (total_batch_size, max_sequence_length), + (total_batch_size, max_input_length + padding_right_offset), ) # We need to slice the attention mask to remove padding from previous steps + # and to remove unused allocated space + left_offset = max_input_length - batch.max_input_length + batch_left_offset = ( + batch.attention_mask.shape[1] + - batch.max_input_length + - batch.padding_right_offset + ) attention_mask[ - start_index:end_index, -batch.max_sequence_length : - ] = batch.attention_mask[:, -batch.max_sequence_length :] + start_index:end_index, + left_offset:-padding_right_offset, + ] = batch.attention_mask[ + :, + batch_left_offset : -batch.padding_right_offset, + ] # Create empty tensor # position_ids is always of shape [batch_size, 1] @@ -172,7 +209,7 @@ class CausalLMBatch(Batch): padded_past_values_shape = ( total_batch_size, num_heads, - max_sequence_length - 1, + max_input_length - 1, head_dim, ) @@ -184,7 +221,7 @@ class CausalLMBatch(Batch): total_batch_size, num_heads, head_dim, - max_sequence_length - 1, + max_input_length - 1, ) # This will run only once per layer @@ -198,20 +235,20 @@ class CausalLMBatch(Batch): past_key_values[j][0][ start_index:end_index, :, - -(batch.max_sequence_length - 1) :, + -(batch.max_input_length - 1) :, :, - ] = past_keys[:, :, -(batch.max_sequence_length - 1) :, :] + ] = past_keys[:, :, -(batch.max_input_length - 1) :, :] else: past_key_values[j][0][ start_index:end_index, :, :, - -(batch.max_sequence_length - 1) :, - ] = past_keys[:, :, :, -(batch.max_sequence_length - 1) :] + -(batch.max_input_length - 1) :, + ] = past_keys[:, :, :, -(batch.max_input_length - 1) :] past_key_values[j][1][ - start_index:end_index, :, -(batch.max_sequence_length - 1) :, : - ] = past_values[:, :, -(batch.max_sequence_length - 1) :, :] + start_index:end_index, :, -(batch.max_input_length - 1) :, : + ] = past_values[:, :, -(batch.max_input_length - 1) :, :] start_index += batch.size @@ -227,7 +264,8 @@ class CausalLMBatch(Batch): next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, size=total_batch_size, - max_sequence_length=max_sequence_length, + max_input_length=max_input_length, + padding_right_offset=padding_right_offset, keys_head_dim_last=batches[0].keys_head_dim_last, ) @@ -294,9 +332,12 @@ class CausalLM(Model): def generate_token( self, batch: CausalLMBatch ) -> Tuple[List[Generation], Optional[CausalLMBatch]]: + # slice the attention mask to the correct shape + attention_mask = batch.attention_mask[:, : -batch.padding_right_offset] + logits, past = self.forward( batch.input_ids, - batch.attention_mask, + attention_mask, batch.position_ids, batch.past_key_values, ) @@ -311,7 +352,7 @@ class CausalLM(Model): # Metadata next_batch_size = 0 - next_batch_max_sequence_length = 0 + next_batch_max_input_length = 0 # Results generations: List[Generation] = [] @@ -347,10 +388,8 @@ class CausalLM(Model): # Generated token next_token_logprob = logprobs[-1, next_token_id] next_token_id_squeezed = next_token_id.squeeze() - next_token_text = self.tokenizer.decode( + next_token_text = self.decode_token( next_token_id_squeezed, - clean_up_tokenization_spaces=False, - skip_special_tokens=False, ) # Evaluate stopping criteria @@ -381,8 +420,8 @@ class CausalLM(Model): next_batch_all_input_ids.append(all_input_ids) next_batch_size += 1 next_batch_input_lengths.append(new_input_length) - next_batch_max_sequence_length = max( - next_batch_max_sequence_length, new_input_length + next_batch_max_input_length = max( + next_batch_max_input_length, new_input_length ) # Prefill @@ -409,6 +448,7 @@ class CausalLM(Model): next_token_id_squeezed, next_token_logprob, next_token_text, + next_token_id_squeezed.item() in self.all_special_ids, generated_text, ) @@ -448,14 +488,8 @@ class CausalLM(Model): next_batch_next_token_choosers = batch.next_token_choosers next_batch_stopping_criterias = batch.stopping_criterias - # Update attention_mask with padding as we added a new token to input_ids - next_batch_attention_mask = torch.cat( - [ - next_batch_attention_mask, - next_batch_attention_mask.new_ones(next_batch_size, 1), - ], - dim=1, - ) + # Update attention_mask as we added a new token to input_ids + next_batch_attention_mask[:, -batch.padding_right_offset] = 1 # Update position_ids next_batch_position_ids = next_batch_position_ids[:, -1:] + 1 @@ -472,7 +506,8 @@ class CausalLM(Model): next_token_choosers=next_batch_next_token_choosers, stopping_criterias=next_batch_stopping_criterias, size=next_batch_size, - max_sequence_length=next_batch_max_sequence_length, + max_input_length=next_batch_max_input_length, + padding_right_offset=batch.padding_right_offset - 1, keys_head_dim_last=batch.keys_head_dim_last, ) return generations, next_batch diff --git a/server/text_generation/models/galactica.py b/server/text_generation_server/models/galactica.py similarity index 87% rename from server/text_generation/models/galactica.py rename to server/text_generation_server/models/galactica.py index f1dc8a30..a90a299e 100644 --- a/server/text_generation/models/galactica.py +++ b/server/text_generation_server/models/galactica.py @@ -2,7 +2,7 @@ import re import torch import torch.distributed -from typing import List, Optional, Type +from typing import List, Optional, Type, Tuple from accelerate import init_empty_weights from safetensors import safe_open @@ -18,15 +18,14 @@ from transformers.models.opt.parallel_layers import ( TensorParallelRowLinear, ) -from text_generation.models import CausalLM -from text_generation.pb import generate_pb2 -from text_generation.models.causal_lm import CausalLMBatch -from text_generation.utils import ( +from text_generation_server.models import CausalLM +from text_generation_server.pb import generate_pb2 +from text_generation_server.models.causal_lm import CausalLMBatch +from text_generation_server.utils import ( NextTokenChooser, StoppingCriteria, initialize_torch_distributed, weight_files, - download_weights, ) HAS_BITS_AND_BYTES = True @@ -97,24 +96,37 @@ class GalacticaCausalLMBatch(CausalLMBatch): input_lengths = [] # Parse batch + max_sequence_length = 0 + padding_right_offset = 0 for r in pb.requests: # Add escape_custom_split_sequence to the CausalLMBatch logic inputs.append(escape_custom_split_sequence(r.inputs)) input_lengths.append(r.input_length) next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device)) - stopping_criterias.append( - StoppingCriteria.from_pb(r.stopping_parameters, tokenizer) + stopping_criteria = StoppingCriteria.from_pb( + r.stopping_parameters, tokenizer + ) + stopping_criterias.append(stopping_criteria) + max_sequence_length = max(max_sequence_length, r.input_length) + padding_right_offset = max( + padding_right_offset, stopping_criteria.max_new_tokens ) # Tokenize batch - pad_to_multiple_of = 8 if device.type == "cuda" else None tokenized_inputs = tokenizer( inputs, return_tensors="pt", padding=True, - pad_to_multiple_of=pad_to_multiple_of, return_token_type_ids=False, ).to(device) + input_ids = tokenized_inputs["input_ids"] + # Allocate maximum attention_mask + attention_mask = input_ids.new_zeros( + (pb.size, max_sequence_length + padding_right_offset) + ) + # Copy tokenizer attention_mask into fully allocated attention_mask + attention_mask[:, :max_sequence_length] = tokenized_inputs["attention_mask"] + position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1 position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1) all_input_ids = tokenized_inputs["input_ids"].unsqueeze(-1) @@ -122,8 +134,8 @@ class GalacticaCausalLMBatch(CausalLMBatch): return cls( batch_id=pb.id, requests=pb.requests, - input_ids=tokenized_inputs["input_ids"], - attention_mask=tokenized_inputs["attention_mask"], + input_ids=input_ids, + attention_mask=attention_mask, position_ids=position_ids, past_key_values=None, all_input_ids=all_input_ids, @@ -131,7 +143,8 @@ class GalacticaCausalLMBatch(CausalLMBatch): next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, size=pb.size, - max_sequence_length=max(input_lengths), + max_sequence_length=max_sequence_length, + padding_right_offset=padding_right_offset, ) @@ -146,14 +159,25 @@ class Galactica(CausalLM): generated_ids, skip_special_tokens=False, cleanup_tokenization_spaces=False ) + def forward( + self, input_ids, attention_mask, position_ids, past_key_values: Optional = None + ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]: + """Overwrite forward to ignore position_ids""" + + # Model Forward + outputs = self.model.forward( + input_ids=input_ids, + attention_mask=attention_mask, + past_key_values=past_key_values, + use_cache=True, + ) + return outputs.logits, outputs.past_key_values + class GalacticaSharded(Galactica): def __init__( self, model_id: str, revision: Optional[str] = None, quantize: bool = False ): - if not model_id.startswith("facebook/galactica"): - raise ValueError(f"Model {model_id} is not supported") - self.process_group, self.rank, self.world_size = initialize_torch_distributed() self.master = self.rank == 0 if torch.cuda.is_available(): @@ -172,14 +196,8 @@ class GalacticaSharded(Galactica): ) tokenizer.pad_token_id = config.pad_token_id - # Only download weights for small models - if self.master and model_id == "facebook/galactica-125m": - download_weights(model_id, revision=revision, extension=".safetensors") - torch.distributed.barrier(group=self.process_group) filenames = weight_files(model_id, revision=revision, extension=".safetensors") - if not filenames: - raise ValueError("No safetensors weights found") with init_empty_weights(): model = AutoModelForCausalLM.from_config(config) @@ -329,7 +347,6 @@ class GalacticaSharded(Galactica): outputs = self.model.forward( input_ids=input_ids, attention_mask=attention_mask, - position_ids=position_ids, past_key_values=past_key_values, use_cache=True, ) diff --git a/server/text_generation/models/gpt_neox.py b/server/text_generation_server/models/gpt_neox.py similarity index 90% rename from server/text_generation/models/gpt_neox.py rename to server/text_generation_server/models/gpt_neox.py index 2d467f4c..8fabefe3 100644 --- a/server/text_generation/models/gpt_neox.py +++ b/server/text_generation_server/models/gpt_neox.py @@ -1,7 +1,7 @@ import torch import torch.distributed -from typing import List, Optional, Tuple +from typing import List, Optional from accelerate import init_empty_weights from safetensors import safe_open @@ -16,11 +16,10 @@ from transformers.models.gpt_neox.parallel_layers import ( TensorParallelRowLinear, ) -from text_generation.models import CausalLM -from text_generation.utils import ( +from text_generation_server.models import CausalLM +from text_generation_server.utils import ( initialize_torch_distributed, weight_files, - download_weights, ) HAS_BITS_AND_BYTES = True @@ -31,23 +30,7 @@ except Exception as e: HAS_BITS_AND_BYTES = False -class GPTNeox(CausalLM): - def forward( - self, input_ids, attention_mask, position_ids, past_key_values: Optional = None - ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]: - """Overwrite forward to ignore position_ids""" - - # Model Forward - outputs = self.model.forward( - input_ids=input_ids, - attention_mask=attention_mask, - past_key_values=past_key_values, - use_cache=True, - ) - return outputs.logits, outputs.past_key_values - - -class GPTNeoxSharded(GPTNeox): +class GPTNeoxSharded(CausalLM): def __init__( self, model_id: str, revision: Optional[str] = None, quantize: bool = False ): @@ -69,14 +52,8 @@ class GPTNeoxSharded(GPTNeox): model_id, revision=revision, tp_parallel=True ) - # Only master download weights - if self.master: - download_weights(model_id, revision=revision, extension=".safetensors") - torch.distributed.barrier(group=self.process_group) filenames = weight_files(model_id, revision=revision, extension=".safetensors") - if not filenames: - raise ValueError("No safetensors weights found") with init_empty_weights(): model = AutoModelForCausalLM.from_config(config) @@ -231,6 +208,7 @@ class GPTNeoxSharded(GPTNeox): outputs = self.model.forward( input_ids=input_ids, attention_mask=attention_mask, + position_ids=position_ids, past_key_values=past_key_values, use_cache=True, ) diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py new file mode 100644 index 00000000..e0ce6686 --- /dev/null +++ b/server/text_generation_server/models/model.py @@ -0,0 +1,43 @@ +import torch + +from abc import ABC, abstractmethod +from typing import List, Tuple, Optional, TypeVar, Type +from transformers import PreTrainedTokenizerBase + +from text_generation_server.models.types import Batch, GeneratedText + +B = TypeVar("B", bound=Batch) + + +class Model(ABC): + def __init__(self, tokenizer: PreTrainedTokenizerBase, device: torch.device): + self.tokenizer = tokenizer + self.all_special_ids = set(tokenizer.all_special_ids) + self.device = device + + # see `decode_token` method + self.tokenizer.add_special_tokens( + {"additional_special_tokens": [""]} + ) + self.special_decode_token_id = self.tokenizer.convert_tokens_to_ids( + "" + ) + self.special_decode_token_length = len("") + + @property + @abstractmethod + def batch_type(self) -> Type[B]: + raise NotImplementedError + + @abstractmethod + def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]: + raise NotImplementedError + + def decode_token(self, token_id: int) -> str: + """Hack to hopefully support generate_stream for the maximum number of tokenizers""" + # append token to special decode token and decode both + result = self.tokenizer.decode( + [self.special_decode_token_id, token_id], skip_special_tokens=False + ) + # slice to remove special decode token + return result[self.special_decode_token_length :] diff --git a/server/text_generation/models/santacoder.py b/server/text_generation_server/models/santacoder.py similarity index 95% rename from server/text_generation/models/santacoder.py rename to server/text_generation_server/models/santacoder.py index fb496197..fe15cde0 100644 --- a/server/text_generation/models/santacoder.py +++ b/server/text_generation_server/models/santacoder.py @@ -1,10 +1,10 @@ import torch import torch.distributed -from typing import Optional, List, Tuple +from typing import Optional, List from transformers import AutoTokenizer, AutoModelForCausalLM -from text_generation.models import CausalLM +from text_generation_server.models import CausalLM FIM_PREFIX = "" FIM_MIDDLE = "" diff --git a/server/text_generation/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py similarity index 82% rename from server/text_generation/models/seq2seq_lm.py rename to server/text_generation_server/models/seq2seq_lm.py index 2f28c4ce..0fe5c03f 100644 --- a/server/text_generation/models/seq2seq_lm.py +++ b/server/text_generation_server/models/seq2seq_lm.py @@ -5,10 +5,15 @@ from opentelemetry import trace from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PreTrainedTokenizerBase from typing import Optional, Tuple, List, Type -from text_generation.models import Model -from text_generation.models.types import GeneratedText, Batch, Generation, PrefillTokens -from text_generation.pb import generate_pb2 -from text_generation.utils import NextTokenChooser, StoppingCriteria, Sampling +from text_generation_server.models import Model +from text_generation_server.models.types import ( + GeneratedText, + Batch, + Generation, + PrefillTokens, +) +from text_generation_server.pb import generate_pb2 +from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling tracer = trace.get_tracer(__name__) @@ -42,9 +47,10 @@ class Seq2SeqLMBatch(Batch): size: int max_input_length: int max_decoder_input_length: int + padding_right_offset: int def to_pb(self) -> generate_pb2.Batch: - """Convert a Seq2SeqLMBatch to a text_generation.v1.Batch protobuf""" + """Convert a Seq2SeqLMBatch to a text_generation_server.v1.Batch protobuf""" return generate_pb2.Batch( id=self.batch_id, requests=self.requests, @@ -58,36 +64,41 @@ class Seq2SeqLMBatch(Batch): tokenizer: PreTrainedTokenizerBase, device: torch.device, ) -> "Seq2SeqLMBatch": - """Convert a text_generation.v1.Batch protobuf to a Seq2SeqLMBatch""" + """Convert a text_generation_server.v1.Batch protobuf to a Seq2SeqLMBatch""" inputs = [] next_token_choosers = [] stopping_criterias = [] - input_lengths = [] decoder_input_ids = [] decoder_input_lengths = [] # Parse batch + padding_right_offset = 0 for r in pb.requests: inputs.append(r.inputs) - input_lengths.append(r.input_length) # Decoder sequence only contains the bos_token decoder_input_ids.append(tokenizer.bos_token_id) decoder_input_lengths.append(1) next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device)) - stopping_criterias.append( - StoppingCriteria.from_pb(r.stopping_parameters, tokenizer) + stopping_criteria = StoppingCriteria.from_pb( + r.stopping_parameters, tokenizer + ) + stopping_criterias.append(stopping_criteria) + padding_right_offset = max( + padding_right_offset, stopping_criteria.max_new_tokens ) # Tokenize batch - pad_to_multiple_of = 8 if device.type == "cuda" else None tokenized_inputs = tokenizer( inputs, return_tensors="pt", padding=True, - pad_to_multiple_of=pad_to_multiple_of, return_token_type_ids=False, ).to(device) + + input_lengths = tokenized_inputs["attention_mask"].sum(1) + max_input_length = input_lengths.max() + # Convert decoder_input_ids to torch tensor of size [batch_size, 1] decoder_input_ids = torch.tensor(decoder_input_ids, device=device).unsqueeze(-1) @@ -100,13 +111,14 @@ class Seq2SeqLMBatch(Batch): decoder_attention_mask=None, encoder_last_hidden_state=None, past_key_values=None, - input_lengths=input_lengths, + input_lengths=input_lengths.tolist(), decoder_input_lengths=decoder_input_lengths, next_token_choosers=next_token_choosers, stopping_criterias=stopping_criterias, size=len(pb.requests), - max_input_length=max(input_lengths), + max_input_length=max_input_length.item(), max_decoder_input_length=1, + padding_right_offset=padding_right_offset, ) @classmethod @@ -115,11 +127,17 @@ class Seq2SeqLMBatch(Batch): """Concatenate multiple batches together by padding internal torch tensors""" # Used for padding - total_batch_size = sum(batch.size for batch in batches) - max_input_length = max(batch.max_input_length for batch in batches) - max_decoder_input_length = max( - batch.max_decoder_input_length for batch in batches - ) + total_batch_size = 0 + max_input_length = 0 + max_decoder_input_length = 0 + padding_right_offset = 0 + for batch in batches: + total_batch_size += batch.size + max_input_length = max(max_input_length, batch.max_input_length) + max_decoder_input_length = max( + max_decoder_input_length, batch.max_decoder_input_length + ) + padding_right_offset = max(padding_right_offset, batch.padding_right_offset) # Batch attributes requests = [] @@ -129,7 +147,6 @@ class Seq2SeqLMBatch(Batch): stopping_criterias = [] # Batch tensors - input_ids = None attention_mask = None decoder_input_ids = None decoder_attention_mask = None @@ -155,16 +172,6 @@ class Seq2SeqLMBatch(Batch): if batch.encoder_last_hidden_state is None: raise ValueError("Batch encoder_last_hidden_state cannot be None") - # Create padded tensor - if input_ids is None: - input_ids = batch.input_ids.new_zeros( - (total_batch_size, max_input_length), - ) - # Copy to correct indices - input_ids[ - start_index:end_index, -batch.max_input_length : - ] = batch.input_ids[:, -batch.max_input_length :] - # Create padded tensor if attention_mask is None: attention_mask = batch.attention_mask.new_zeros( @@ -189,19 +196,30 @@ class Seq2SeqLMBatch(Batch): if decoder_attention_mask is None: # As decoder_attention_mask might not exist, we use `batch.attention_mask` for device here decoder_attention_mask = batch.attention_mask.new_zeros( - (total_batch_size, max_decoder_input_length), + (total_batch_size, max_decoder_input_length + padding_right_offset), ) # If the decoder mask does not exist yet, all generations started at the same time and we never concatenated # this batch. All generations are of length `batch.max_decoder_input_length`. + left_offset = max_decoder_input_length - batch.max_decoder_input_length if batch.decoder_attention_mask is None: decoder_attention_mask[ - start_index:end_index, -batch.max_decoder_input_length : + start_index:end_index, + left_offset:-padding_right_offset, ] = 1 # If it exists, we need to index else: + batch_left_offset = ( + batch.decoder_attention_mask.shape[1] + - batch.max_decoder_input_length + - batch.padding_right_offset + ) decoder_attention_mask[ - start_index:end_index, -batch.max_decoder_input_length : - ] = batch.decoder_attention_mask[:, -batch.max_decoder_input_length :] + start_index:end_index, + left_offset:-padding_right_offset, + ] = batch.decoder_attention_mask[ + :, + batch_left_offset : -batch.padding_right_offset, + ] # Create padded tensor if encoder_last_hidden_state is None: @@ -273,7 +291,7 @@ class Seq2SeqLMBatch(Batch): return cls( batch_id=batches[0].batch_id, requests=requests, - input_ids=input_ids, + input_ids=None, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, @@ -286,6 +304,7 @@ class Seq2SeqLMBatch(Batch): size=total_batch_size, max_input_length=max_input_length, max_decoder_input_length=max_decoder_input_length, + padding_right_offset=padding_right_offset, ) def __len__(self): @@ -326,7 +345,9 @@ class Seq2SeqLM(Model): return Seq2SeqLMBatch def decode(self, decoder_ids: List[int]) -> str: - return self.tokenizer.decode(decoder_ids, skip_special_tokens=True) + return self.tokenizer.decode( + decoder_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False + ) def forward( self, @@ -342,14 +363,6 @@ class Seq2SeqLM(Model): List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]], ]: # Model Forward - if past_key_values is not None: - decoder_input_ids = decoder_input_ids[:, -1].unsqueeze(-1) - - # Wrap `encoder_last_hidden_state` because for some reason, Transformers does a `encoder_last_hidden_state[0]` - # internally... - if encoder_last_hidden_state is not None: - encoder_last_hidden_state = [encoder_last_hidden_state] - outputs = self.model.forward( input_ids=input_ids, attention_mask=attention_mask, @@ -369,12 +382,34 @@ class Seq2SeqLM(Model): def generate_token( self, batch: Seq2SeqLMBatch ) -> Tuple[List[Generation], Optional[Seq2SeqLMBatch]]: + if batch.decoder_attention_mask is not None: + # slice to the correct shape + decoder_attention_mask = batch.decoder_attention_mask[ + :, : -batch.padding_right_offset + ] + else: + decoder_attention_mask = None + + # check if first forward or not + if batch.past_key_values is not None: + # Only take the last token + decoder_input_ids = batch.decoder_input_ids[:, -1].unsqueeze(-1) + else: + decoder_input_ids = batch.decoder_input_ids + + # Wrap `encoder_last_hidden_state` because for some reason, Transformers does a `encoder_last_hidden_state[0]` + # internally... + if batch.encoder_last_hidden_state is not None: + encoder_last_hidden_state = [batch.encoder_last_hidden_state] + else: + encoder_last_hidden_state = batch.encoder_last_hidden_state + logits, encoder_last_hidden_state, past = self.forward( batch.input_ids, batch.attention_mask, - batch.decoder_input_ids, - batch.decoder_attention_mask, - batch.encoder_last_hidden_state, + decoder_input_ids, + decoder_attention_mask, + encoder_last_hidden_state, batch.past_key_values, ) @@ -402,7 +437,6 @@ class Seq2SeqLM(Model): logits, batch.next_token_choosers, batch.stopping_criterias, - batch.input_ids, batch.decoder_input_ids, ) @@ -414,7 +448,6 @@ class Seq2SeqLM(Model): logits, next_token_chooser, stopping_criteria, - input_tokens, decoder_input_ids, ) in enumerate(iterator): # Select next token @@ -429,10 +462,8 @@ class Seq2SeqLM(Model): # Generated token next_token_logprob = logprobs[-1, next_token_id] next_token_id_squeezed = next_token_id.squeeze() - next_token_text = self.tokenizer.decode( + next_token_text = self.decode_token( next_token_id_squeezed, - clean_up_tokenization_spaces=False, - skip_special_tokens=False, ) # Evaluate stopping criteria @@ -469,14 +500,10 @@ class Seq2SeqLM(Model): # Prefill if stopping_criteria.current_tokens == 1: - prefill_token_ids = decoder_input_ids[-new_decoder_input_length:-1] - prefill_texts = self.tokenizer.batch_decode( - prefill_token_ids, - clean_up_tokenization_spaces=False, - skip_special_tokens=False, - ) prefill_tokens = PrefillTokens( - prefill_token_ids, [float("nan")], prefill_texts + [self.tokenizer.bos_token_id], + [float("nan")], + [self.tokenizer.bos_token], ) else: prefill_tokens = None @@ -487,6 +514,7 @@ class Seq2SeqLM(Model): next_token_id_squeezed, next_token_logprob, next_token_text, + next_token_id_squeezed.item() in self.all_special_ids, generated_text, ) @@ -500,10 +528,8 @@ class Seq2SeqLM(Model): # If we finished at least one generation, we need to evict the indices of the generations that finished # from the values of the next batch if len(next_batch_keep_indices) != len(batch): - # Apply indices to attention mask, past key values and other items that need to be cached - next_batch_input_ids = batch.input_ids[next_batch_keep_indices] + # Apply indices to decoder_attention mask, past key values and other items that need to be cached next_batch_attention_mask = batch.attention_mask[next_batch_keep_indices] - if batch.decoder_attention_mask is not None: next_batch_decoder_attention_mask = batch.decoder_attention_mask[ next_batch_keep_indices @@ -526,7 +552,6 @@ class Seq2SeqLM(Model): batch.stopping_criterias[i] for i in next_batch_keep_indices ] else: - next_batch_input_ids = batch.input_ids next_batch_attention_mask = batch.attention_mask next_batch_decoder_attention_mask = batch.decoder_attention_mask next_batch_encoder_last_hidden_state = encoder_last_hidden_state @@ -536,20 +561,14 @@ class Seq2SeqLM(Model): next_batch_next_token_choosers = batch.next_token_choosers next_batch_stopping_criterias = batch.stopping_criterias - # Update decoder_attention_mask with padding as we added a new token to input_ids + # Update decoder_attention_mask as we added a new token to input_ids if next_batch_decoder_attention_mask is not None: - next_batch_decoder_attention_mask = torch.cat( - [ - next_batch_decoder_attention_mask, - next_batch_decoder_attention_mask.new_ones(next_batch_size, 1), - ], - dim=1, - ) + next_batch_decoder_attention_mask[:, -batch.padding_right_offset] = 1 next_batch = Seq2SeqLMBatch( batch_id=batch.batch_id, requests=next_batch_requests, - input_ids=next_batch_input_ids, + input_ids=None, attention_mask=next_batch_attention_mask, decoder_input_ids=next_batch_decoder_input_ids, decoder_attention_mask=next_batch_decoder_attention_mask, @@ -562,5 +581,6 @@ class Seq2SeqLM(Model): size=next_batch_size, max_input_length=next_batch_max_input_length, max_decoder_input_length=next_batch_max_decoder_input_length, + padding_right_offset=batch.padding_right_offset - 1, ) return generations, next_batch diff --git a/server/text_generation/models/t5.py b/server/text_generation_server/models/t5.py similarity index 92% rename from server/text_generation/models/t5.py rename to server/text_generation_server/models/t5.py index d7241c81..cb4f7f22 100644 --- a/server/text_generation/models/t5.py +++ b/server/text_generation_server/models/t5.py @@ -16,11 +16,10 @@ from transformers.models.t5.parallel_layers import ( TensorParallelRowLinear, ) -from text_generation.models import Seq2SeqLM -from text_generation.utils import ( +from text_generation_server.models import Seq2SeqLM +from text_generation_server.utils import ( initialize_torch_distributed, weight_files, - download_weights, ) HAS_BITS_AND_BYTES = True @@ -53,14 +52,8 @@ class T5Sharded(Seq2SeqLM): ) tokenizer.bos_token_id = config.decoder_start_token_id - # Only master download weights - if self.master: - download_weights(model_id, revision=revision, extension=".safetensors") - torch.distributed.barrier(group=self.process_group) filenames = weight_files(model_id, revision=revision, extension=".safetensors") - if not filenames: - raise ValueError("No safetensors weights found") with init_empty_weights(): model = AutoModelForSeq2SeqLM.from_config(config) @@ -228,14 +221,6 @@ class T5Sharded(Seq2SeqLM): List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]], ]: # Model Forward - if past_key_values is not None: - decoder_input_ids = decoder_input_ids[:, -1].unsqueeze(-1) - - # Wrap `encoder_last_hidden_state` because for some reason, Transformers does a `encoder_last_hidden_state[0]` - # internally... - if encoder_last_hidden_state is not None: - encoder_last_hidden_state = [encoder_last_hidden_state] - outputs = self.model.forward( input_ids=input_ids, attention_mask=attention_mask, diff --git a/server/text_generation/models/types.py b/server/text_generation_server/models/types.py similarity index 91% rename from server/text_generation/models/types.py rename to server/text_generation_server/models/types.py index d1117b80..93c3b9db 100644 --- a/server/text_generation/models/types.py +++ b/server/text_generation_server/models/types.py @@ -6,8 +6,8 @@ from typing import List, Optional from transformers import PreTrainedTokenizerBase -from text_generation.pb import generate_pb2 -from text_generation.pb.generate_pb2 import FinishReason +from text_generation_server.pb import generate_pb2 +from text_generation_server.pb.generate_pb2 import FinishReason class Batch(ABC): @@ -73,6 +73,7 @@ class Generation: token_id: int token_logprob: float token_text: str + token_is_special: bool generated_text: Optional[GeneratedText] def to_pb(self) -> generate_pb2.Generation: @@ -84,6 +85,7 @@ class Generation: token_id=self.token_id, token_logprob=self.token_logprob, token_text=self.token_text, + token_is_special=self.token_is_special, generated_text=self.generated_text.to_pb() if self.generated_text is not None else None, diff --git a/server/text_generation/pb/.gitignore b/server/text_generation_server/pb/.gitignore similarity index 100% rename from server/text_generation/pb/.gitignore rename to server/text_generation_server/pb/.gitignore diff --git a/server/text_generation/server.py b/server/text_generation_server/server.py similarity index 92% rename from server/text_generation/server.py rename to server/text_generation_server/server.py index f3129cb4..0b75c3c7 100644 --- a/server/text_generation/server.py +++ b/server/text_generation_server/server.py @@ -9,11 +9,11 @@ from grpc_reflection.v1alpha import reflection from pathlib import Path from typing import List, Optional -from text_generation.cache import Cache -from text_generation.interceptor import ExceptionInterceptor -from text_generation.models import Model, get_model -from text_generation.pb import generate_pb2_grpc, generate_pb2 -from text_generation.tracing import UDSOpenTelemetryAioServerInterceptor +from text_generation_server.cache import Cache +from text_generation_server.interceptor import ExceptionInterceptor +from text_generation_server.models import Model, get_model +from text_generation_server.pb import generate_pb2_grpc, generate_pb2 +from text_generation_server.tracing import UDSOpenTelemetryAioServerInterceptor class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer): diff --git a/server/text_generation/tracing.py b/server/text_generation_server/tracing.py similarity index 100% rename from server/text_generation/tracing.py rename to server/text_generation_server/tracing.py diff --git a/server/text_generation_server/utils/__init__.py b/server/text_generation_server/utils/__init__.py new file mode 100644 index 00000000..50d64518 --- /dev/null +++ b/server/text_generation_server/utils/__init__.py @@ -0,0 +1,36 @@ +from text_generation_server.utils.convert import convert_file, convert_files +from text_generation_server.utils.dist import initialize_torch_distributed +from text_generation_server.utils.hub import ( + weight_files, + weight_hub_files, + download_weights, + EntryNotFoundError, + LocalEntryNotFoundError, + RevisionNotFoundError, +) +from text_generation_server.utils.tokens import ( + Greedy, + NextTokenChooser, + Sampling, + StoppingCriteria, + StopSequenceCriteria, + FinishReason, +) + +__all__ = [ + "convert_file", + "convert_files", + "initialize_torch_distributed", + "weight_files", + "weight_hub_files", + "download_weights", + "EntryNotFoundError", + "LocalEntryNotFoundError", + "RevisionNotFoundError", + "Greedy", + "NextTokenChooser", + "Sampling", + "StoppingCriteria", + "StopSequenceCriteria", + "FinishReason", +] diff --git a/server/text_generation_server/utils/convert.py b/server/text_generation_server/utils/convert.py new file mode 100644 index 00000000..437e2308 --- /dev/null +++ b/server/text_generation_server/utils/convert.py @@ -0,0 +1,94 @@ +import concurrent +import time +import torch + +from concurrent.futures import ThreadPoolExecutor +from collections import defaultdict +from datetime import timedelta +from loguru import logger +from pathlib import Path +from safetensors.torch import load_file, save_file +from typing import Dict, List + + +def check_file_size(source_file: Path, target_file: Path): + """ + Check that two files are close in size + """ + source_file_size = source_file.stat().st_size + target_file_size = target_file.stat().st_size + + if (source_file_size - target_file_size) / source_file_size > 0.01: + raise RuntimeError( + f"""The file size different is more than 1%: + - {source_file}: {source_file_size} + - {target_file}: {target_file_size} + """ + ) + + +def remove_shared_pointers(tensors: Dict[str, torch.Tensor]): + """ + For a Dict of tensors, check if two or more tensors point to the same underlying memory and + remove them + """ + ptrs = defaultdict(list) + for k, v in tensors.items(): + ptrs[v.data_ptr()].append(k) + + # Iterate over all found memory addresses + for ptr, names in ptrs.items(): + if len(names) > 1: + # Multiple tensors are point to the same memory + # Only keep the first tensor + for name in names[1:]: + tensors.pop(name) + + +def convert_file(pt_file: Path, st_file: Path): + """ + Convert a pytorch file to a safetensors file + """ + logger.info(f"Convert {pt_file} to {st_file}.") + + pt_state = torch.load(pt_file, map_location="cpu") + if "state_dict" in pt_state: + pt_state = pt_state["state_dict"] + + remove_shared_pointers(pt_state) + + # Tensors need to be contiguous + pt_state = {k: v.contiguous() for k, v in pt_state.items()} + + st_file.parent.mkdir(parents=True, exist_ok=True) + save_file(pt_state, str(st_file), metadata={"format": "pt"}) + + # Check that both files are close in size + check_file_size(pt_file, st_file) + + # Load safetensors state + st_state = load_file(str(st_file)) + for k in st_state: + pt_tensor = pt_state[k] + st_tensor = st_state[k] + if not torch.equal(pt_tensor, st_tensor): + raise RuntimeError(f"The output tensors do not match for key {k}") + + +def convert_files(pt_files: List[Path], st_files: List[Path]): + assert len(pt_files) == len(st_files) + + executor = ThreadPoolExecutor(max_workers=5) + futures = [ + executor.submit(convert_file, pt_file=pt_file, st_file=st_file) + for pt_file, st_file in zip(pt_files, st_files) + ] + + # We do this instead of using tqdm because we want to parse the logs with the launcher + start_time = time.time() + for i, future in enumerate(concurrent.futures.as_completed(futures)): + elapsed = timedelta(seconds=int(time.time() - start_time)) + remaining = len(futures) - (i + 1) + eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0 + + logger.info(f"Convert: [{i + 1}/{len(futures)}] -- ETA: {eta}") diff --git a/server/text_generation_server/utils/dist.py b/server/text_generation_server/utils/dist.py new file mode 100644 index 00000000..9785493e --- /dev/null +++ b/server/text_generation_server/utils/dist.py @@ -0,0 +1,35 @@ +import os +import torch + +from datetime import timedelta + + +def initialize_torch_distributed(): + rank = int(os.getenv("RANK", "0")) + world_size = int(os.getenv("WORLD_SIZE", "1")) + + if torch.cuda.is_available(): + from torch.distributed import ProcessGroupNCCL + + # Set the device id. + assert world_size <= torch.cuda.device_count(), "Each process is one gpu" + device = rank % torch.cuda.device_count() + torch.cuda.set_device(device) + backend = "nccl" + options = ProcessGroupNCCL.Options() + options.is_high_priority_stream = True + options._timeout = timedelta(seconds=60) + else: + backend = "gloo" + options = None + + # Call the init process. + torch.distributed.init_process_group( + backend=backend, + world_size=world_size, + rank=rank, + timeout=timedelta(seconds=60), + pg_options=options, + ) + + return torch.distributed.group.WORLD, rank, world_size diff --git a/server/text_generation_server/utils/hub.py b/server/text_generation_server/utils/hub.py new file mode 100644 index 00000000..d338fb29 --- /dev/null +++ b/server/text_generation_server/utils/hub.py @@ -0,0 +1,165 @@ +import time +import os + +from datetime import timedelta +from loguru import logger +from pathlib import Path +from typing import Optional, List + +from huggingface_hub import HfApi, hf_hub_download +from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE +from huggingface_hub.utils import ( + LocalEntryNotFoundError, + EntryNotFoundError, + RevisionNotFoundError, # Import here to ease try/except in other part of the lib +) + +WEIGHTS_CACHE_OVERRIDE = os.getenv("WEIGHTS_CACHE_OVERRIDE", None) + + +def weight_hub_files( + model_id: str, revision: Optional[str] = None, extension: str = ".safetensors" +) -> List[str]: + """Get the weights filenames on the hub""" + api = HfApi() + info = api.model_info(model_id, revision=revision) + filenames = [s.rfilename for s in info.siblings if s.rfilename.endswith(extension)] + + if not filenames: + raise EntryNotFoundError( + f"No {extension} weights found for model {model_id} and revision {revision}.", + None, + ) + + return filenames + + +def try_to_load_from_cache( + model_id: str, revision: Optional[str], filename: str +) -> Optional[Path]: + """Try to load a file from the Hugging Face cache""" + if revision is None: + revision = "main" + + object_id = model_id.replace("/", "--") + repo_cache = Path(HUGGINGFACE_HUB_CACHE) / f"models--{object_id}" + + if not repo_cache.is_dir(): + # No cache for this model + return None + + refs_dir = repo_cache / "refs" + snapshots_dir = repo_cache / "snapshots" + no_exist_dir = repo_cache / ".no_exist" + + # Resolve refs (for instance to convert main to the associated commit sha) + if refs_dir.is_dir(): + revision_file = refs_dir / revision + if revision_file.exists(): + with revision_file.open() as f: + revision = f.read() + + # Check if file is cached as "no_exist" + if (no_exist_dir / revision / filename).is_file(): + return None + + # Check if revision folder exists + if not snapshots_dir.exists(): + return None + cached_shas = os.listdir(snapshots_dir) + if revision not in cached_shas: + # No cache for this revision and we won't try to return a random revision + return None + + # Check if file exists in cache + cached_file = snapshots_dir / revision / filename + return cached_file if cached_file.is_file() else None + + +def weight_files( + model_id: str, revision: Optional[str] = None, extension: str = ".safetensors" +) -> List[Path]: + """Get the local files""" + # Local model + if Path(model_id).exists() and Path(model_id).is_dir(): + return list(Path(model_id).glob(f"*{extension}")) + + try: + filenames = weight_hub_files(model_id, revision, extension) + except EntryNotFoundError as e: + if extension != ".safetensors": + raise e + # Try to see if there are pytorch weights + pt_filenames = weight_hub_files(model_id, revision, extension=".bin") + # Change pytorch extension to safetensors extension + # It is possible that we have safetensors weights locally even though they are not on the + # hub if we converted weights locally without pushing them + filenames = [ + f"{Path(f).stem.lstrip('pytorch_')}.safetensors" for f in pt_filenames + ] + + if WEIGHTS_CACHE_OVERRIDE is not None: + files = [] + for filename in filenames: + p = Path(WEIGHTS_CACHE_OVERRIDE) / filename + if not p.exists(): + raise LocalEntryNotFoundError( + f"File {p} not found in {WEIGHTS_CACHE_OVERRIDE}." + ) + files.append(p) + return files + + files = [] + for filename in filenames: + cache_file = try_to_load_from_cache( + model_id, revision=revision, filename=filename + ) + if cache_file is None: + raise LocalEntryNotFoundError( + f"File {filename} of model {model_id} not found in " + f"{os.getenv('HUGGINGFACE_HUB_CACHE', 'the local cache')}. " + f"Please run `text-generation-server download-weights {model_id}` first." + ) + files.append(cache_file) + + return files + + +def download_weights( + filenames: List[str], model_id: str, revision: Optional[str] = None +) -> List[Path]: + """Download the safetensors files from the hub""" + + def download_file(filename): + local_file = try_to_load_from_cache(model_id, revision, filename) + if local_file is not None: + logger.info(f"File {filename} already present in cache.") + return Path(local_file) + + logger.info(f"Download file: {filename}") + start_time = time.time() + local_file = hf_hub_download( + filename=filename, + repo_id=model_id, + revision=revision, + local_files_only=False, + ) + logger.info( + f"Downloaded {local_file} in {timedelta(seconds=int(time.time() - start_time))}." + ) + return Path(local_file) + + # We do this instead of using tqdm because we want to parse the logs with the launcher + start_time = time.time() + files = [] + for i, filename in enumerate(filenames): + file = download_file(filename) + + elapsed = timedelta(seconds=int(time.time() - start_time)) + remaining = len(filenames) - (i + 1) + eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0 + + logger.info(f"Download: [{i + 1}/{len(filenames)}] -- ETA: {eta}") + files.append(file) + + return files diff --git a/server/text_generation_server/utils/tokens.py b/server/text_generation_server/utils/tokens.py new file mode 100644 index 00000000..c7594644 --- /dev/null +++ b/server/text_generation_server/utils/tokens.py @@ -0,0 +1,160 @@ +import re +import torch + +from transformers import ( + LogitsProcessorList, + TemperatureLogitsWarper, + TopKLogitsWarper, + TopPLogitsWarper, + TypicalLogitsWarper, + RepetitionPenaltyLogitsProcessor, + PreTrainedTokenizerBase, +) +from typing import List, Tuple, Optional + +from text_generation_server.pb import generate_pb2 +from text_generation_server.pb.generate_pb2 import FinishReason +from text_generation_server.utils.watermark import WatermarkLogitsProcessor + + +class Sampling: + def __init__(self, seed: int, device: str = "cpu"): + self.generator = torch.Generator(device) + self.generator.manual_seed(seed) + self.seed = seed + + def __call__(self, logits): + probs = torch.nn.functional.softmax(logits) + next_tokens = torch.multinomial(probs, num_samples=1, generator=self.generator) + return next_tokens + + +class Greedy: + def __call__(self, logits): + return logits.argmax() + + +class NextTokenChooser: + def __init__( + self, + watermark=False, + temperature=1.0, + repetition_penalty=1.0, + top_k=None, + top_p=None, + typical_p=None, + do_sample=False, + seed=0, + device="cpu", + ): + warpers = LogitsProcessorList() + # the following idea is largely copied from this PR: https://github.com/huggingface/transformers/pull/5420/files + # all samplers can be found in `generation_utils_samplers.py` + sampling = do_sample + + if watermark: + warpers.append(WatermarkLogitsProcessor(device=device)) + if repetition_penalty is not None and repetition_penalty != 1.0: + warpers.append(RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty)) + if temperature is not None and temperature != 1.0: + temperature = float(temperature) + warpers.append(TemperatureLogitsWarper(temperature)) + sampling = True + if top_k is not None and top_k != 0: + warpers.append(TopKLogitsWarper(top_k=top_k)) + sampling = True + if top_p is not None and top_p < 1.0: + warpers.append(TopPLogitsWarper(top_p=top_p)) + sampling = True + if typical_p is not None and typical_p < 1.0: + warpers.append(TypicalLogitsWarper(mass=typical_p)) + sampling = True + + self.warpers = warpers + self.choice = Sampling(seed, device) if sampling else Greedy() + + def __call__(self, input_ids, scores): + # Warp logits + if scores.shape[0] > 1: + # only warp the last token logits + scores[-1:, :] = self.warpers(input_ids, scores[-1:, :]) + else: + scores = self.warpers(input_ids, scores) + + # Compute logprobs + logprobs = torch.log_softmax(scores, -1) + + # Choose tokens + next_id = self.choice(scores[-1]) + + return next_id.view(1, 1), logprobs + + @classmethod + def from_pb( + cls, + pb: generate_pb2.NextTokenChooserParameters, + device: torch.device, + ) -> "NextTokenChooser": + return NextTokenChooser( + watermark=pb.watermark, + temperature=pb.temperature, + repetition_penalty=pb.repetition_penalty, + top_k=pb.top_k, + top_p=pb.top_p, + typical_p=pb.typical_p, + do_sample=pb.do_sample, + seed=pb.seed, + device=device, + ) + + +class StopSequenceCriteria: + def __init__(self, stop_sequence: str): + self.regex = re.compile(f".*{stop_sequence}$") + + def __call__(self, output: str) -> bool: + if self.regex.findall(output): + return True + return False + + +class StoppingCriteria: + def __init__( + self, + eos_token_id: int, + stop_sequence_criterias: List[StopSequenceCriteria], + max_new_tokens=20, + ): + self.eos_token_id = eos_token_id + self.stop_sequence_criterias = stop_sequence_criterias + self.max_new_tokens = max_new_tokens + self.current_tokens = 0 + self.current_output = "" + + def __call__(self, last_token: int, last_output: str) -> Tuple[bool, Optional[str]]: + self.current_tokens += 1 + if self.current_tokens >= self.max_new_tokens: + return True, FinishReason.FINISH_REASON_LENGTH + + if last_token == self.eos_token_id: + return True, FinishReason.FINISH_REASON_EOS_TOKEN + + self.current_output += last_output + for stop_sequence_criteria in self.stop_sequence_criterias: + if stop_sequence_criteria(self.current_output): + return True, FinishReason.FINISH_REASON_STOP_SEQUENCE + + return False, None + + @classmethod + def from_pb( + cls, + pb: generate_pb2.StoppingCriteriaParameters, + tokenizer: PreTrainedTokenizerBase, + ) -> "StoppingCriteria": + stop_sequence_criterias = [ + StopSequenceCriteria(sequence) for sequence in pb.stop_sequences + ] + return StoppingCriteria( + tokenizer.eos_token_id, stop_sequence_criterias, pb.max_new_tokens + ) diff --git a/server/text_generation_server/utils/watermark.py b/server/text_generation_server/utils/watermark.py new file mode 100644 index 00000000..8e90a59c --- /dev/null +++ b/server/text_generation_server/utils/watermark.py @@ -0,0 +1,87 @@ +# coding=utf-8 +# Copyright 2023 Authors of "A Watermark for Large Language Models" +# available at https://arxiv.org/abs/2301.10226 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os + +import torch +from transformers import LogitsProcessor + +GAMMA = os.getenv("WATERMARK_GAMMA", 0.5) +DELTA = os.getenv("WATERMARK_DELTA", 2.0) + + +class WatermarkLogitsProcessor(LogitsProcessor): + def __init__( + self, + gamma: float = GAMMA, + delta: float = DELTA, + hash_key: int = 15485863, # just a large prime number to create a rng seed with sufficient bit width + device: str = "cpu", + ): + # watermarking parameters + self.gamma = gamma + self.delta = delta + self.rng = torch.Generator(device=device) + self.hash_key = hash_key + + def _seed_rng(self, input_ids: torch.LongTensor) -> None: + assert ( + input_ids.shape[-1] >= 1 + ), "requires at least a 1 token prefix sequence to seed rng" + prev_token = input_ids[-1].item() + self.rng.manual_seed(self.hash_key * prev_token) + + def _get_greenlist_ids( + self, input_ids: torch.LongTensor, max_value: int + ) -> list[int]: + # seed the rng using the previous tokens/prefix + self._seed_rng(input_ids) + + greenlist_size = int(max_value * self.gamma) + vocab_permutation = torch.randperm( + max_value, device=input_ids.device, generator=self.rng + ) + greenlist_ids = vocab_permutation[:greenlist_size] + return greenlist_ids + + @staticmethod + def _calc_greenlist_mask( + scores: torch.FloatTensor, greenlist_token_ids + ) -> torch.BoolTensor: + green_tokens_mask = torch.zeros_like(scores) + green_tokens_mask[-1, greenlist_token_ids] = 1 + final_mask = green_tokens_mask.bool() + return final_mask + + @staticmethod + def _bias_greenlist_logits( + scores: torch.Tensor, greenlist_mask: torch.Tensor, greenlist_bias: float + ) -> torch.Tensor: + scores[greenlist_mask] = scores[greenlist_mask] + greenlist_bias + return scores + + def __call__( + self, input_ids: torch.LongTensor, scores: torch.FloatTensor + ) -> torch.FloatTensor: + assert len(input_ids) == 1 + greenlist_ids = self._get_greenlist_ids(input_ids[0], scores.shape[-1]) + green_tokens_mask = self._calc_greenlist_mask( + scores=scores, greenlist_token_ids=greenlist_ids + ) + + scores = self._bias_greenlist_logits( + scores=scores, greenlist_mask=green_tokens_mask, greenlist_bias=self.delta + ) + return scores diff --git a/supported_models.json b/supported_models.json new file mode 100644 index 00000000..86d3bdfe --- /dev/null +++ b/supported_models.json @@ -0,0 +1,9 @@ +[ + "bigscience/bloom", + "bigscience/bloomz", + "EleutherAI/gpt-neox-20b", + "google/flan-ul2", + "google/flan-t5-xxl", + "OpenAssistant/oasst-sft-1-pythia-12b", + "olivierdehaene/optimized-santacoder" +]