From bcfcd4740a7e0a95aec79e54d4bff60c335e9420 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 27 Jun 2024 08:08:43 +0200 Subject: [PATCH 01/12] Fixing prom leak by upgrading. (#2129) --- Cargo.lock | 592 +++++++++++++++++++++++++++++++++------------- router/Cargo.toml | 6 +- 2 files changed, 429 insertions(+), 169 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b9bd7363..584b4134 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "anstyle-query" -version = "1.0.3" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" dependencies = [ "windows-sys 0.52.0", ] @@ -121,7 +121,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -160,7 +160,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -171,9 +171,15 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.3.0" @@ -226,6 +232,33 @@ dependencies = [ "slotmap", ] +[[package]] +name = "aws-lc-rs" +version = "1.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7d844e282b4b56750b2d4e893b2205581ded8709fddd2b6aa5418c150ca877" +dependencies = [ + "aws-lc-sys", + "mirai-annotations", + "paste", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3a2c29203f6bf296d01141cc8bb9dbd5ecd4c27843f2ee0767bcd5985a927da" +dependencies = [ + "bindgen", + "cc", + "cmake", + "dunce", + "fs_extra", + "libc", + "paste", +] + [[package]] name = "axum" version = "0.6.20" @@ -239,7 +272,7 @@ dependencies = [ "futures-util", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.28", + "hyper 0.14.29", "itoa", "matchit", "memchr", @@ -350,9 +383,9 @@ dependencies = [ [[package]] name = "backtrace" -version = "0.3.72" +version = "0.3.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" dependencies = [ "addr2line", "cc", @@ -381,6 +414,29 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bindgen" +version = "0.69.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +dependencies = [ + "bitflags 2.6.0", + "cexpr", + "clang-sys", + "itertools 0.12.1", + "lazy_static", + "lazycell", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.68", + "which", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -410,15 +466,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "bitstream-io" -version = "2.3.0" +version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c12d1856e42f0d817a835fe55853957c85c8c8a470114029143d3f12671446e" +checksum = "415f8399438eb5e4b2f73ed3152a3448b98149dda642a957ee704e1daa5cf1d8" [[package]] name = "block-buffer" @@ -449,9 +505,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.16.0" +version = "1.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" +checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" [[package]] name = "byteorder" @@ -511,15 +567,24 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" [[package]] name = "cc" -version = "1.0.98" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" +checksum = "ac367972e516d45567c7eafc73d24e1c193dcf200a8d94e9db7b3d38b349572d" dependencies = [ "jobserver", "libc", "once_cell", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-expr" version = "0.15.8" @@ -543,10 +608,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" [[package]] -name = "clap" -version = "4.5.4" +name = "clang-sys" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" +checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "clap" +version = "4.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5db83dced34638ad474f39f250d7fea9598bdd239eaced1bdf45d597da0f433f" dependencies = [ "clap_builder", "clap_derive", @@ -554,9 +630,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.2" +version = "4.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" +checksum = "f7e204572485eb3fbf28f871612191521df159bc3e15a9f5064c66dba3a8c05f" dependencies = [ "anstream", "anstyle", @@ -566,21 +642,30 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.4" +version = "4.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" +checksum = "c780290ccf4fb26629baa7a1081e68ced113f1d3ec302fa5948f1c381ebf06c6" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "clap_lex" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + +[[package]] +name = "cmake" +version = "0.1.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130" +dependencies = [ + "cc", +] [[package]] name = "color_quant" @@ -681,7 +766,7 @@ version = "0.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "crossterm_winapi", "libc", "mio", @@ -747,7 +832,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -758,7 +843,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" dependencies = [ "darling_core", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -788,7 +873,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -798,7 +883,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -832,6 +917,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "dunce" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" + [[package]] name = "easy-cast" version = "0.5.2" @@ -843,9 +934,9 @@ dependencies = [ [[package]] name = "either" -version = "1.12.0" +version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" [[package]] name = "encode_unicode" @@ -1005,6 +1096,12 @@ dependencies = [ "num", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.30" @@ -1061,7 +1158,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1177,6 +1274,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", + "indexmap 2.2.6", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.4.1" @@ -1193,20 +1309,14 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -[[package]] -name = "hashbrown" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ff8ae62cd3a9102e5637afc8452c55acf3844001bd5374e0b0bd7b6616c038" -dependencies = [ - "ahash", -] - [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] [[package]] name = "heck" @@ -1247,6 +1357,15 @@ dependencies = [ "ureq", ] +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + [[package]] name = "hostname" version = "0.3.1" @@ -1303,12 +1422,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", - "futures-core", + "futures-util", "http 1.1.0", "http-body 1.0.0", "pin-project-lite", @@ -1316,9 +1435,9 @@ dependencies = [ [[package]] name = "httparse" -version = "1.8.0" +version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" +checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" [[package]] name = "httpdate" @@ -1328,15 +1447,15 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.28" +version = "0.14.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80" +checksum = "f361cde2f109281a220d4307746cdfd5ee3f410da58a70377762396775634b33" dependencies = [ "bytes", "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "httparse", @@ -1359,6 +1478,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2 0.4.5", "http 1.1.0", "http-body 1.0.0", "httparse", @@ -1367,6 +1487,26 @@ dependencies = [ "pin-project-lite", "smallvec", "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +dependencies = [ + "futures-util", + "http 1.1.0", + "hyper 1.3.1", + "hyper-util", + "log", + "rustls 0.23.10", + "rustls-native-certs", + "rustls-pki-types", + "tokio", + "tokio-rustls", + "tower-service", ] [[package]] @@ -1375,7 +1515,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper 0.14.28", + "hyper 0.14.29", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -1388,7 +1528,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.28", + "hyper 0.14.29", "native-tls", "tokio", "tokio-native-tls", @@ -1401,12 +1541,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" dependencies = [ "bytes", + "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.0", "hyper 1.3.1", "pin-project-lite", + "socket2", "tokio", + "tower", + "tower-service", + "tracing", ] [[package]] @@ -1534,7 +1679,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1647,9 +1792,15 @@ dependencies = [ [[package]] name = "lazy_static" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "lebe" @@ -1674,6 +1825,16 @@ dependencies = [ "once_cell", ] +[[package]] +name = "libloading" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" +dependencies = [ + "cfg-if", + "windows-targets 0.52.5", +] + [[package]] name = "libm" version = "0.2.8" @@ -1686,7 +1847,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "libc", ] @@ -1721,15 +1882,6 @@ dependencies = [ "imgref", ] -[[package]] -name = "mach2" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" -dependencies = [ - "libc", -] - [[package]] name = "macro_rules_attribute" version = "0.2.0" @@ -1779,9 +1931,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.2" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "metrics" @@ -1795,16 +1947,29 @@ dependencies = [ ] [[package]] -name = "metrics-exporter-prometheus" -version = "0.12.2" +name = "metrics" +version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d4fa7ce7c4862db464a37b0b31d89bca874562f034bd7993895572783d02950" +checksum = "884adb57038347dfbaf2d5065887b6cf4312330dc8e94bc30a1a839bd79d3261" dependencies = [ - "base64 0.21.7", - "hyper 0.14.28", - "indexmap 1.9.3", + "ahash", + "portable-atomic", +] + +[[package]] +name = "metrics-exporter-prometheus" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf0af7a0d7ced10c0151f870e5e3f3f8bc9ffc5992d32873566ca1f9169ae776" +dependencies = [ + "base64 0.22.1", + "http-body-util", + "hyper 1.3.1", + "hyper-rustls", + "hyper-util", + "indexmap 2.2.6", "ipnet", - "metrics", + "metrics 0.23.0", "metrics-util", "quanta", "thiserror", @@ -1820,19 +1985,19 @@ checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "metrics-util" -version = "0.15.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4de2ed6e491ed114b40b732e4d1659a9d53992ebd87490c44a6ffe23739d973e" +checksum = "4259040465c955f9f2f1a4a8a16dc46726169bca0f88e8fb2dbeced487c3e828" dependencies = [ "crossbeam-epoch", "crossbeam-utils", - "hashbrown 0.13.1", - "metrics", + "hashbrown 0.14.5", + "metrics 0.23.0", "num_cpus", "quanta", "sketches-ddsketch", @@ -1881,9 +2046,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87dfd01fe195c66b572b37921ad8803d010623c0aca821bea2302239d155cdae" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" dependencies = [ "adler", "simd-adler32", @@ -1901,6 +2066,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "mirai-annotations" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" + [[package]] name = "monostate" version = "0.1.13" @@ -1919,7 +2090,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -1985,12 +2156,12 @@ dependencies = [ "bytes", "futures", "hostname", - "hyper 0.14.28", + "hyper 0.14.29", "muxado", "once_cell", "parking_lot", "regex", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "thiserror", @@ -2007,7 +2178,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cfg-if", "cfg_aliases", "libc", @@ -2107,7 +2278,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2178,9 +2349,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.35.0" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +checksum = "576dfe1fc8f9df304abb159d767a29d0476f7750fbf8aa7ad07816004a207434" dependencies = [ "memchr", ] @@ -2219,7 +2390,7 @@ version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cfg-if", "foreign-types", "libc", @@ -2236,7 +2407,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2485,7 +2656,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2544,7 +2715,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f12335488a2f3b0a83b14edad48dca9879ce89b2edd10e80237e4e852dd645e" dependencies = [ "proc-macro2", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2573,9 +2744,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.85" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22244ce15aa966053a896d1accb3a6e68469b97c7f33f284b99f0d576879fc23" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" dependencies = [ "unicode-ident", ] @@ -2596,7 +2767,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd" dependencies = [ "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2636,7 +2807,7 @@ dependencies = [ "prost 0.12.6", "prost-types", "regex", - "syn 2.0.66", + "syn 2.0.68", "tempfile", ] @@ -2663,7 +2834,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -2686,13 +2857,12 @@ dependencies = [ [[package]] name = "quanta" -version = "0.11.1" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17e662a7a8291a865152364c20c7abc5e60486ab2001e8ec10b24862de0b9ab" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" dependencies = [ "crossbeam-utils", "libc", - "mach2", "once_cell", "raw-cpuid", "wasi", @@ -2751,7 +2921,7 @@ version = "0.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "cassowary", "crossterm", "indoc", @@ -2799,9 +2969,9 @@ dependencies = [ [[package]] name = "ravif" -version = "0.11.5" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234" +checksum = "67376f469e7e7840d0040bbf4b9b3334005bb167f814621326e4c7ab8cd6e944" dependencies = [ "avif-serialize", "imgref", @@ -2814,11 +2984,11 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "10.7.0" +version = "11.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", ] [[package]] @@ -2854,11 +3024,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" +checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", ] [[package]] @@ -2874,14 +3044,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.4" +version = "1.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.6", - "regex-syntax 0.8.3", + "regex-automata 0.4.7", + "regex-syntax 0.8.4", ] [[package]] @@ -2895,13 +3065,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.6" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", ] [[package]] @@ -2912,9 +3082,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.3" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" @@ -2927,10 +3097,10 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.28", + "hyper 0.14.29", "hyper-tls", "ipnet", "js-sys", @@ -2940,7 +3110,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -3015,7 +3185,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.66", + "syn 2.0.68", "walkdir", ] @@ -3035,6 +3205,12 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.4.0" @@ -3050,7 +3226,7 @@ version = "0.38.34" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys", @@ -3083,6 +3259,34 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls" +version = "0.23.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05cff451f60db80f490f3c182b77c35260baace73209e9cdbbe526bfe3a4d402" +dependencies = [ + "aws-lc-rs", + "log", + "once_cell", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" +dependencies = [ + "openssl-probe", + "rustls-pemfile 2.1.2", + "rustls-pki-types", + "schannel", + "security-framework", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -3092,6 +3296,16 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.1", + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.7.0" @@ -3104,6 +3318,7 @@ version = "0.102.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff448f7e92e913c4b7d4c6d8e4540a1724b319b4152b8aef6d4cf8339712b33e" dependencies = [ + "aws-lc-rs", "ring 0.17.8", "rustls-pki-types", "untrusted 0.9.0", @@ -3161,7 +3376,7 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c627723fd09706bacdb5cf41499e95098555af3c3c29d014dc3c458ef6be11c0" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "core-foundation", "core-foundation-sys", "libc", @@ -3204,14 +3419,14 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" dependencies = [ "itoa", "ryu", @@ -3269,6 +3484,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook" version = "0.3.17" @@ -3406,14 +3627,14 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] name = "subtle" -version = "2.5.0" +version = "2.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" @@ -3428,9 +3649,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.66" +version = "2.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" dependencies = [ "proc-macro2", "quote", @@ -3612,7 +3833,7 @@ dependencies = [ "image", "init-tracing-opentelemetry", "jsonschema", - "metrics", + "metrics 0.21.1", "metrics-exporter-prometheus", "minijinja", "minijinja-contrib", @@ -3633,6 +3854,8 @@ dependencies = [ "tokio-stream", "tower-http", "tracing", + "tracing-core", + "tracing-log 0.2.0", "tracing-opentelemetry 0.21.0", "tracing-subscriber", "utoipa", @@ -3657,7 +3880,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -3716,9 +3939,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.6.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +checksum = "c55115c6fbe2d2bef26eb09ad74bde02d8255476fc0c7b515ef09fbb35742d82" dependencies = [ "tinyvec_macros", ] @@ -3752,7 +3975,7 @@ dependencies = [ "rayon", "rayon-cond", "regex", - "regex-syntax 0.8.3", + "regex-syntax 0.8.4", "serde", "serde_json", "spm_precompiled", @@ -3799,7 +4022,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -3823,6 +4046,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +dependencies = [ + "rustls 0.23.10", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.15" @@ -3850,9 +4084,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.13" +version = "0.8.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4e43f8cc456c9704c851ae29c67e17ef65d2c30017c17a9765b89c382dc8bba" +checksum = "6f49eb2ab21d2f26bd6db7bf383edc527a7ebaee412d17af4d40fdccd442f335" dependencies = [ "serde", "serde_spanned", @@ -3871,9 +4105,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.13" +version = "0.22.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c127785850e8c20836d49732ae6abfa47616e60bf9d9f57c43c250361a9db96c" +checksum = "f21c7aaf97f1bd9ca9d4f9e73b0a6c74bd5afef56f2bc931943a6e1c37e04e38" dependencies = [ "indexmap 2.2.6", "serde", @@ -3894,10 +4128,10 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.28", + "hyper 0.14.29", "hyper-timeout", "percent-encoding", "pin-project", @@ -3921,10 +4155,10 @@ dependencies = [ "axum 0.6.20", "base64 0.21.7", "bytes", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.28", + "hyper 0.14.29", "hyper-timeout", "percent-encoding", "pin-project", @@ -3947,7 +4181,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -3976,7 +4210,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.5.0", + "bitflags 2.6.0", "bytes", "http 1.1.0", "http-body 1.0.0", @@ -4018,7 +4252,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -4189,9 +4423,9 @@ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202" [[package]] name = "unicode-width" -version = "0.1.12" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" +checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" [[package]] name = "unicode_categories" @@ -4233,9 +4467,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.0" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" dependencies = [ "form_urlencoded", "idna", @@ -4250,9 +4484,9 @@ checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "utf8parse" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utoipa" @@ -4276,7 +4510,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -4297,9 +4531,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.8.0" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" +checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439" [[package]] name = "v_frame" @@ -4398,7 +4632,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", "wasm-bindgen-shared", ] @@ -4432,7 +4666,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4475,9 +4709,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.1" +version = "0.26.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +checksum = "bd7c23921eeb1713a4e851530e9b9756e4fb0e89978582942612524cf09f01cd" dependencies = [ "rustls-pki-types", ] @@ -4488,6 +4722,18 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix", +] + [[package]] name = "winapi" version = "0.3.9" @@ -4745,9 +4991,9 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winnow" -version = "0.6.9" +version = "0.6.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86c949fede1d13936a99f14fafd3e76fd642b556dd2ce96287fbe2e0151bfac6" +checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1" dependencies = [ "memchr", ] @@ -4779,7 +5025,7 @@ checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn 2.0.68", ] [[package]] @@ -4787,6 +5033,20 @@ name = "zeroize" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +dependencies = [ + "zeroize_derive", +] + +[[package]] +name = "zeroize_derive" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.68", +] [[package]] name = "zip" diff --git a/router/Cargo.toml b/router/Cargo.toml index 5bf4c00c..853f46b1 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -24,7 +24,7 @@ futures = "0.3.28" hf-hub = { workspace = true } jsonschema = { version = "0.17.1", features = ["draft202012"] } metrics = "0.21.1" -metrics-exporter-prometheus = { version = "0.12.1", features = [] } +metrics-exporter-prometheus = { version = "0.15.1", features = [] } nohash-hasher = "0.2.0" opentelemetry = { version = "0.20.0", features = ["rt-tokio"] } opentelemetry-otlp = "0.13.0" @@ -37,9 +37,9 @@ tokenizers = { workspace = true} tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] } tokio-stream = "0.1.14" tower-http = { version = "0.5.1", features = ["cors"] } -tracing = "0.1.37" +tracing = "0.1.40" tracing-opentelemetry = "0.21.0" -tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } +tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] } utoipa = { version = "4.2.0", features = ["axum_extras"] } utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] } ngrok = { version = "0.13.1", features = ["axum"], optional = true } From b53b21c63a3fb0e57c23cb7e7b15d3c4588c66c0 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 27 Jun 2024 12:34:43 +0200 Subject: [PATCH 02/12] Bumping to 2.1 (#2131) --- Cargo.toml | 2 +- README.md | 8 +++++--- docs/source/installation_amd.md | 2 +- docs/source/installation_nvidia.md | 2 +- docs/source/quicktour.md | 4 ++-- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bc2da5a1..74737aab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ members = [ resolver = "2" [workspace.package] -version = "2.0.5-dev0" +version = "2.1.1-dev0" edition = "2021" authors = ["Olivier Dehaene"] homepage = "https://github.com/huggingface/text-generation-inference" diff --git a/README.md b/README.md index d60c7cde..cda6d2d7 100644 --- a/README.md +++ b/README.md @@ -75,9 +75,11 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c ```shell model=HuggingFaceH4/zephyr-7b-beta -volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run +# share a volume with the Docker container to avoid downloading weights every run +volume=$PWD/data -docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.0 --model-id $model +docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \ + ghcr.io/huggingface/text-generation-inference:2.1.0 --model-id $model ``` And then you can make requests like @@ -91,7 +93,7 @@ curl 127.0.0.1:8080/generate_stream \ **Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar. -**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.0-rocm --model-id $model` instead of the command above. +**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.1.0-rocm --model-id $model` instead of the command above. To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli): ``` diff --git a/docs/source/installation_amd.md b/docs/source/installation_amd.md index bf7f9c75..fe925e2a 100644 --- a/docs/source/installation_amd.md +++ b/docs/source/installation_amd.md @@ -11,7 +11,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading docker run --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \ --device=/dev/kfd --device=/dev/dri --group-add video \ --ipc=host --shm-size 256g --net host -v $volume:/data \ - ghcr.io/huggingface/text-generation-inference:2.0.4-rocm \ + ghcr.io/huggingface/text-generation-inference:2.1.0-rocm \ --model-id $model ``` diff --git a/docs/source/installation_nvidia.md b/docs/source/installation_nvidia.md index 9077f7fd..11c41763 100644 --- a/docs/source/installation_nvidia.md +++ b/docs/source/installation_nvidia.md @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run docker run --gpus all --shm-size 64g -p 8080:80 -v $volume:/data \ - ghcr.io/huggingface/text-generation-inference:2.0.4 \ + ghcr.io/huggingface/text-generation-inference:2.1.0 \ --model-id $model ``` diff --git a/docs/source/quicktour.md b/docs/source/quicktour.md index b84de85d..09e56df4 100644 --- a/docs/source/quicktour.md +++ b/docs/source/quicktour.md @@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \ - ghcr.io/huggingface/text-generation-inference:2.0.4 \ + ghcr.io/huggingface/text-generation-inference:2.1.0 \ --model-id $model ``` @@ -88,7 +88,7 @@ curl 127.0.0.1:8080/generate \ To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more. ```bash -docker run ghcr.io/huggingface/text-generation-inference:2.0.4 --help +docker run ghcr.io/huggingface/text-generation-inference:2.1.0 --help ``` From dd2d91b043ae935f3097ce65c44403d5aa9ecc46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Thu, 27 Jun 2024 15:54:35 +0200 Subject: [PATCH 03/12] Idefics2: sync added image tokens with transformers (#2080) Before this change, the number of reserved image tokens was not the same as the number of images. Fixes #2029. While at it, also remove all the image token handling duplication in `prepare_input`. --- Cargo.lock | 1 + .../test_flash_idefics2_next_load.json | 11124 ++++++++-------- .../test_flash_idefics2_next_simple.json | 20 +- .../test_flash_idefics2_two_images.json | 38 +- router/Cargo.toml | 1 + router/src/config.rs | 12 +- router/src/lib.rs | 19 + router/src/main.rs | 12 +- router/src/server.rs | 8 +- router/src/validation.rs | 242 +- .../models/custom_modeling/llava_next.py | 9 +- .../models/pali_gemma.py | 4 +- .../models/vlm_causal_lm.py | 57 +- 13 files changed, 5887 insertions(+), 5660 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 584b4134..a03da8b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3832,6 +3832,7 @@ dependencies = [ "hf-hub", "image", "init-tracing-opentelemetry", + "itertools 0.10.5", "jsonschema", "metrics 0.21.1", "metrics-exporter-prometheus", diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json index 4bc90896..7f1875e0 100644 --- a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json +++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_load.json @@ -37,7 +37,7 @@ }, { "id": 32001, - "logprob": -19.484375, + "logprob": -19.46875, "text": "" }, { @@ -57,7 +57,7 @@ }, { "id": 32001, - "logprob": -20.234375, + "logprob": -20.21875, "text": "" }, { @@ -65,11 +65,1785 @@ "logprob": -16.421875, "text": "" }, + { + "id": 32001, + "logprob": -19.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -23.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.28125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -22.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -23.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -23.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.78125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -14.8828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -23.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -23.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -23.0, + "text": "" + }, + { + "id": 32001, + "logprob": -19.75, + "text": "" + }, + { + "id": 32001, + "logprob": -17.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.9921875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -22.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.78125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.2734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.859375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.546875, + "text": "" + }, + { + "id": 32001, + "logprob": -14.1953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.0, + "text": "" + }, + { + "id": 32001, + "logprob": -20.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -14.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.2988281, + "text": "" + }, + { + "id": 32001, + "logprob": -25.75, + "text": "" + }, + { + "id": 32001, + "logprob": -18.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.7421875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.25, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.9453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.4453125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32000, + "logprob": -2.7207031, + "text": "" + }, + { + "id": 32001, + "logprob": -23.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.5, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.0625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.25, + "text": "" + }, { "id": 32001, "logprob": -19.828125, "text": "" }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.6640625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32000, + "logprob": -3.0917969, + "text": "" + }, + { + "id": 32001, + "logprob": -25.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.75, + "text": "" + }, + { + "id": 32001, + "logprob": -18.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.1328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.0, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.75, + "text": "" + }, + { + "id": 32001, + "logprob": -16.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -22.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.3984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.75, + "text": "" + }, + { + "id": 32001, + "logprob": -14.6484375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.5, + "text": "" + }, + { + "id": 32001, + "logprob": -20.0, + "text": "" + }, + { + "id": 32001, + "logprob": -18.78125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -13.6171875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.9609375, + "text": "" + }, + { + "id": 32000, + "logprob": -2.0332031, + "text": "" + }, + { + "id": 12018, + "logprob": -12.078125, + "text": "Write" + }, + { + "id": 528, + "logprob": -10.09375, + "text": "me" + }, + { + "id": 264, + "logprob": -0.103393555, + "text": "a" + }, + { + "id": 2485, + "logprob": -4.5742188, + "text": "short" + }, + { + "id": 2838, + "logprob": -0.23815918, + "text": "story" + }, + { + "id": 32002, + "logprob": -10.9765625, + "text": "" + }, + { + "id": 259, + "logprob": -20.34375, + "text": " " + }, + { + "id": 13, + "logprob": -8.53125, + "text": "\n" + }, + { + "id": 7226, + "logprob": -10.4765625, + "text": "Ass" + }, + { + "id": 11143, + "logprob": -13.6015625, + "text": "istant" + }, + { + "id": 28747, + "logprob": -0.008514404, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 330, + "logprob": -0.09289551, + "special": false, + "text": " A" + }, + { + "id": 13088, + "logprob": -0.6743164, + "special": false, + "text": " chicken" + }, + { + "id": 349, + "logprob": -0.31396484, + "special": false, + "text": " is" + }, + { + "id": 6398, + "logprob": -0.051727295, + "special": false, + "text": " sitting" + }, + { + "id": 356, + "logprob": -0.34448242, + "special": false, + "text": " on" + }, + { + "id": 264, + "logprob": -0.1194458, + "special": false, + "text": " a" + }, + { + "id": 17972, + "logprob": -0.03237915, + "special": false, + "text": " pile" + }, + { + "id": 302, + "logprob": -0.00018751621, + "special": false, + "text": " of" + }, + { + "id": 2445, + "logprob": -0.07043457, + "special": false, + "text": " money" + }, + { + "id": 28723, + "logprob": -0.00422287, + "special": false, + "text": "." + } + ], + "top_tokens": null + }, + "generated_text": " A chicken is sitting on a pile of money." + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1247, + "logprob": -5.2382812, + "text": "User" + }, + { + "id": 28747, + "logprob": -6.9492188, + "text": ":" + }, + { + "id": 32000, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.84375, + "text": "" + }, { "id": 32001, "logprob": -23.25, @@ -102,7 +1876,7 @@ }, { "id": 32001, - "logprob": -21.015625, + "logprob": -21.03125, "text": "" }, { @@ -112,12 +1886,12 @@ }, { "id": 32001, - "logprob": -16.015625, + "logprob": -16.03125, "text": "" }, { "id": 32001, - "logprob": -19.0625, + "logprob": -19.046875, "text": "" }, { @@ -127,12 +1901,7 @@ }, { "id": 32001, - "logprob": -23.625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.40625, + "logprob": -23.609375, "text": "" }, { @@ -142,7 +1911,12 @@ }, { "id": 32001, - "logprob": -20.84375, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, "text": "" }, { @@ -152,12 +1926,12 @@ }, { "id": 32001, - "logprob": -19.984375, + "logprob": -19.96875, "text": "" }, { "id": 32001, - "logprob": -18.21875, + "logprob": -18.234375, "text": "" }, { @@ -167,7 +1941,7 @@ }, { "id": 32001, - "logprob": -21.203125, + "logprob": -21.1875, "text": "" }, { @@ -182,7 +1956,7 @@ }, { "id": 32001, - "logprob": -18.984375, + "logprob": -19.03125, "text": "" }, { @@ -197,17 +1971,17 @@ }, { "id": 32001, - "logprob": -18.0, + "logprob": -17.96875, "text": "" }, { "id": 32001, - "logprob": -18.828125, + "logprob": -18.859375, "text": "" }, { "id": 32001, - "logprob": -17.9375, + "logprob": -17.921875, "text": "" }, { @@ -217,12 +1991,12 @@ }, { "id": 32001, - "logprob": -18.640625, + "logprob": -18.65625, "text": "" }, { "id": 32001, - "logprob": -20.125, + "logprob": -20.140625, "text": "" }, { @@ -247,7 +2021,7 @@ }, { "id": 32001, - "logprob": -17.4375, + "logprob": -17.421875, "text": "" }, { @@ -257,7 +2031,7 @@ }, { "id": 32001, - "logprob": -23.015625, + "logprob": -23.0, "text": "" }, { @@ -282,7 +2056,7 @@ }, { "id": 32001, - "logprob": -18.40625, + "logprob": -18.421875, "text": "" }, { @@ -292,17 +2066,17 @@ }, { "id": 32001, - "logprob": -18.34375, + "logprob": -18.328125, "text": "" }, { "id": 32001, - "logprob": -17.140625, + "logprob": -17.125, "text": "" }, { "id": 32001, - "logprob": -18.671875, + "logprob": -18.65625, "text": "" }, { @@ -317,7 +2091,7 @@ }, { "id": 32001, - "logprob": -18.1875, + "logprob": -18.15625, "text": "" }, { @@ -337,12 +2111,7 @@ }, { "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.9375, + "logprob": -18.703125, "text": "" }, { @@ -351,883 +2120,38 @@ "text": "" }, { - "id": 32001, - "logprob": -19.125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.25, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4140625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.7265625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -14.2421875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8515625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5, - "text": "" - }, - { - "id": 32001, - "logprob": -20.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.6875, - "text": "" - }, - { - "id": 32001, - "logprob": -22.625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, - "text": "" - }, - { - "id": 32001, - "logprob": -21.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.375, - "text": "" + "id": 32000, + "logprob": -3.015625, + "text": "" }, { "id": 32001, - "logprob": -20.140625, + "logprob": -22.109375, "text": "" }, { "id": 32001, - "logprob": -21.140625, + "logprob": -18.96875, "text": "" }, { "id": 32001, - "logprob": -21.6875, + "logprob": -20.125, "text": "" }, { "id": 32001, - "logprob": -21.453125, + "logprob": -17.125, "text": "" }, { "id": 32001, - "logprob": -19.171875, + "logprob": -17.8125, "text": "" }, { "id": 32001, - "logprob": -17.78125, + "logprob": -19.3125, "text": "" }, { @@ -1237,157 +2161,27 @@ }, { "id": 32001, - "logprob": -17.078125, + "logprob": -16.3125, "text": "" }, { "id": 32001, - "logprob": -17.109375, + "logprob": -19.375, "text": "" }, { "id": 32001, - "logprob": -19.171875, + "logprob": -20.046875, "text": "" }, { "id": 32001, - "logprob": -20.453125, + "logprob": -20.828125, "text": "" }, { "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8828125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.1171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, + "logprob": -15.8046875, "text": "" }, { @@ -1397,7 +2191,12 @@ }, { "id": 32001, - "logprob": -19.46875, + "logprob": -19.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, "text": "" }, { @@ -1407,22 +2206,297 @@ }, { "id": 32001, - "logprob": -22.421875, + "logprob": -20.515625, "text": "" }, { "id": 32001, - "logprob": -20.9375, + "logprob": -19.171875, "text": "" }, { "id": 32001, - "logprob": -19.671875, + "logprob": -19.296875, "text": "" }, { "id": 32001, - "logprob": -20.890625, + "logprob": -16.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.2734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.859375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.546875, + "text": "" + }, + { + "id": 32001, + "logprob": -14.1953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.0, + "text": "" + }, + { + "id": 32001, + "logprob": -20.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -14.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.2988281, + "text": "" + }, + { + "id": 32001, + "logprob": -25.75, + "text": "" + }, + { + "id": 32001, + "logprob": -18.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.203125, "text": "" }, { @@ -1432,12 +2506,732 @@ }, { "id": 32001, - "logprob": -17.5, + "logprob": -15.75, "text": "" }, { "id": 32001, - "logprob": -17.90625, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.25, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.4453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32000, + "logprob": -2.7207031, + "text": "" + }, + { + "id": 32001, + "logprob": -23.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.28125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.6640625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32000, + "logprob": -3.0917969, + "text": "" + }, + { + "id": 32001, + "logprob": -25.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.1328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.0, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, "text": "" }, { @@ -1452,37 +3246,37 @@ }, { "id": 32001, - "logprob": -16.40625, + "logprob": -16.375, "text": "" }, { "id": 32001, - "logprob": -18.453125, + "logprob": -18.4375, "text": "" }, { "id": 32001, - "logprob": -20.234375, + "logprob": -20.265625, "text": "" }, { "id": 32001, - "logprob": -22.28125, + "logprob": -22.296875, "text": "" }, { "id": 32001, - "logprob": -18.515625, + "logprob": -18.484375, "text": "" }, { "id": 32001, - "logprob": -15.4296875, + "logprob": -15.390625, "text": "" }, { "id": 32001, - "logprob": -19.765625, + "logprob": -19.75, "text": "" }, { @@ -1492,32 +3286,32 @@ }, { "id": 32001, - "logprob": -21.46875, + "logprob": -21.609375, "text": "" }, { "id": 32001, - "logprob": -18.875, + "logprob": -18.828125, "text": "" }, { "id": 32001, - "logprob": -20.859375, + "logprob": -20.828125, "text": "" }, { "id": 32001, - "logprob": -17.078125, + "logprob": -17.015625, "text": "" }, { "id": 32001, - "logprob": -16.4375, + "logprob": -16.40625, "text": "" }, { "id": 32001, - "logprob": -21.015625, + "logprob": -21.046875, "text": "" }, { @@ -1532,22 +3326,22 @@ }, { "id": 32001, - "logprob": -21.484375, + "logprob": -21.515625, "text": "" }, { "id": 32001, - "logprob": -20.015625, + "logprob": -20.0, "text": "" }, { "id": 32001, - "logprob": -18.84375, + "logprob": -18.78125, "text": "" }, { "id": 32001, - "logprob": -16.40625, + "logprob": -16.375, "text": "" }, { @@ -1557,57 +3351,57 @@ }, { "id": 32001, - "logprob": -16.65625, + "logprob": -16.703125, "text": "" }, { "id": 32001, - "logprob": -13.6328125, + "logprob": -13.625, "text": "" }, { "id": 32001, - "logprob": -15.4140625, + "logprob": -15.375, "text": "" }, { "id": 32001, - "logprob": -17.546875, + "logprob": -17.515625, "text": "" }, { "id": 32001, - "logprob": -21.859375, + "logprob": -21.921875, "text": "" }, { "id": 32001, - "logprob": -15.65625, + "logprob": -15.640625, "text": "" }, { "id": 32001, - "logprob": -16.484375, + "logprob": -16.46875, "text": "" }, { "id": 32001, - "logprob": -16.359375, + "logprob": -16.421875, "text": "" }, { "id": 32001, - "logprob": -19.9375, + "logprob": -19.890625, "text": "" }, { "id": 32001, - "logprob": -17.875, + "logprob": -17.890625, "text": "" }, { "id": 32001, - "logprob": -17.453125, + "logprob": -17.40625, "text": "" }, { @@ -1617,72 +3411,72 @@ }, { "id": 32001, - "logprob": -19.171875, + "logprob": -19.1875, "text": "" }, { "id": 32001, - "logprob": -15.9921875, + "logprob": -15.9609375, "text": "" }, { "id": 32000, - "logprob": -2.0429688, + "logprob": -2.0332031, "text": "" }, { "id": 12018, - "logprob": -12.03125, + "logprob": -12.078125, "text": "Write" }, { "id": 528, - "logprob": -10.25, + "logprob": -10.109375, "text": "me" }, { "id": 264, - "logprob": -0.10437012, + "logprob": -0.103515625, "text": "a" }, { "id": 2485, - "logprob": -4.5742188, + "logprob": -4.5664062, "text": "short" }, { "id": 2838, - "logprob": -0.2277832, + "logprob": -0.23864746, "text": "story" }, { "id": 32002, - "logprob": -10.84375, + "logprob": -10.9609375, "text": "" }, { "id": 259, - "logprob": -20.1875, + "logprob": -20.34375, "text": " " }, { "id": 13, - "logprob": -8.7578125, + "logprob": -8.5546875, "text": "\n" }, { "id": 7226, - "logprob": -10.421875, + "logprob": -10.484375, "text": "Ass" }, { "id": 11143, - "logprob": -13.640625, + "logprob": -13.6015625, "text": "istant" }, { "id": 28747, - "logprob": -0.005619049, + "logprob": -0.008308411, "text": ":" } ], @@ -1690,61 +3484,61 @@ "tokens": [ { "id": 330, - "logprob": -0.12939453, + "logprob": -0.09448242, "special": false, "text": " A" }, { "id": 13088, - "logprob": -0.6660156, + "logprob": -0.6743164, "special": false, "text": " chicken" }, { "id": 349, - "logprob": -0.29638672, + "logprob": -0.31201172, "special": false, "text": " is" }, { "id": 6398, - "logprob": -0.05960083, + "logprob": -0.051635742, "special": false, "text": " sitting" }, { "id": 356, - "logprob": -0.26953125, + "logprob": -0.34033203, "special": false, "text": " on" }, { "id": 264, - "logprob": -0.1427002, + "logprob": -0.1194458, "special": false, "text": " a" }, { "id": 17972, - "logprob": -0.040649414, + "logprob": -0.032562256, "special": false, "text": " pile" }, { "id": 302, - "logprob": -0.0002708435, + "logprob": -0.00018763542, "special": false, "text": " of" }, { "id": 2445, - "logprob": -0.09429932, + "logprob": -0.07122803, "special": false, "text": " money" }, { "id": 28723, - "logprob": -0.006931305, + "logprob": -0.0041007996, "special": false, "text": "." } @@ -1766,12 +3560,12 @@ }, { "id": 1247, - "logprob": -5.234375, + "logprob": -5.2382812, "text": "User" }, { "id": 28747, - "logprob": -6.9648438, + "logprob": -6.9492188, "text": ":" }, { @@ -1781,12 +3575,12 @@ }, { "id": 32001, - "logprob": -18.96875, + "logprob": -18.984375, "text": "" }, { "id": 32001, - "logprob": -18.1875, + "logprob": -18.171875, "text": "" }, { @@ -1811,7 +3605,7 @@ }, { "id": 32001, - "logprob": -20.234375, + "logprob": -20.21875, "text": "" }, { @@ -1821,7 +3615,7 @@ }, { "id": 32001, - "logprob": -19.828125, + "logprob": -19.84375, "text": "" }, { @@ -1856,22 +3650,22 @@ }, { "id": 32001, - "logprob": -21.015625, + "logprob": -21.03125, "text": "" }, { "id": 32001, - "logprob": -20.4375, + "logprob": -20.421875, "text": "" }, { "id": 32001, - "logprob": -16.015625, + "logprob": -16.03125, "text": "" }, { "id": 32001, - "logprob": -19.0625, + "logprob": -19.046875, "text": "" }, { @@ -1886,7 +3680,7 @@ }, { "id": 32001, - "logprob": -20.40625, + "logprob": -20.421875, "text": "" }, { @@ -1896,7 +3690,7 @@ }, { "id": 32001, - "logprob": -20.84375, + "logprob": -20.875, "text": "" }, { @@ -1906,12 +3700,12 @@ }, { "id": 32001, - "logprob": -19.984375, + "logprob": -19.96875, "text": "" }, { "id": 32001, - "logprob": -18.21875, + "logprob": -18.234375, "text": "" }, { @@ -1921,7 +3715,7 @@ }, { "id": 32001, - "logprob": -21.203125, + "logprob": -21.1875, "text": "" }, { @@ -1936,7 +3730,7 @@ }, { "id": 32001, - "logprob": -18.984375, + "logprob": -19.03125, "text": "" }, { @@ -1951,17 +3745,17 @@ }, { "id": 32001, - "logprob": -18.0, + "logprob": -17.96875, "text": "" }, { "id": 32001, - "logprob": -18.828125, + "logprob": -18.859375, "text": "" }, { "id": 32001, - "logprob": -17.9375, + "logprob": -17.921875, "text": "" }, { @@ -1971,12 +3765,12 @@ }, { "id": 32001, - "logprob": -18.640625, + "logprob": -18.65625, "text": "" }, { "id": 32001, - "logprob": -20.125, + "logprob": -20.140625, "text": "" }, { @@ -1996,12 +3790,12 @@ }, { "id": 32001, - "logprob": -23.203125, + "logprob": -23.21875, "text": "" }, { "id": 32001, - "logprob": -17.4375, + "logprob": -17.421875, "text": "" }, { @@ -2011,7 +3805,7 @@ }, { "id": 32001, - "logprob": -23.015625, + "logprob": -23.0, "text": "" }, { @@ -2036,7 +3830,7 @@ }, { "id": 32001, - "logprob": -18.40625, + "logprob": -18.421875, "text": "" }, { @@ -2046,7 +3840,7 @@ }, { "id": 32001, - "logprob": -18.34375, + "logprob": -18.328125, "text": "" }, { @@ -2056,7 +3850,7 @@ }, { "id": 32001, - "logprob": -18.671875, + "logprob": -18.65625, "text": "" }, { @@ -2071,7 +3865,7 @@ }, { "id": 32001, - "logprob": -18.1875, + "logprob": -18.15625, "text": "" }, { @@ -2081,7 +3875,7 @@ }, { "id": 32001, - "logprob": -20.140625, + "logprob": -20.15625, "text": "" }, { @@ -2089,1643 +3883,29 @@ "logprob": -18.96875, "text": "" }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.25, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4140625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.7265625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -14.2421875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8515625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.203125, - "text": "" - }, { "id": 32001, "logprob": -18.703125, "text": "" }, - { - "id": 32001, - "logprob": -19.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5, - "text": "" - }, - { - "id": 32001, - "logprob": -20.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.6875, - "text": "" - }, - { - "id": 32001, - "logprob": -22.625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, - "text": "" - }, - { - "id": 32001, - "logprob": -21.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.1171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -22.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.5, - "text": "" - }, - { - "id": 32001, - "logprob": -17.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -22.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.75, - "text": "" - }, - { - "id": 32001, - "logprob": -16.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -22.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4296875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.6484375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -13.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9921875, - "text": "" - }, - { - "id": 32000, - "logprob": -2.0429688, - "text": "" - }, - { - "id": 12018, - "logprob": -12.03125, - "text": "Write" - }, - { - "id": 528, - "logprob": -10.2578125, - "text": "me" - }, - { - "id": 264, - "logprob": -0.10418701, - "text": "a" - }, - { - "id": 2485, - "logprob": -4.5664062, - "text": "short" - }, - { - "id": 2838, - "logprob": -0.22741699, - "text": "story" - }, - { - "id": 32002, - "logprob": -10.8515625, - "text": "" - }, - { - "id": 259, - "logprob": -20.203125, - "text": " " - }, - { - "id": 13, - "logprob": -8.7421875, - "text": "\n" - }, - { - "id": 7226, - "logprob": -10.4140625, - "text": "Ass" - }, - { - "id": 11143, - "logprob": -13.6328125, - "text": "istant" - }, - { - "id": 28747, - "logprob": -0.005580902, - "text": ":" - } - ], - "seed": null, - "tokens": [ - { - "id": 330, - "logprob": -0.1295166, - "special": false, - "text": " A" - }, - { - "id": 13088, - "logprob": -0.6669922, - "special": false, - "text": " chicken" - }, - { - "id": 349, - "logprob": -0.29711914, - "special": false, - "text": " is" - }, - { - "id": 6398, - "logprob": -0.059936523, - "special": false, - "text": " sitting" - }, - { - "id": 356, - "logprob": -0.27124023, - "special": false, - "text": " on" - }, - { - "id": 264, - "logprob": -0.140625, - "special": false, - "text": " a" - }, - { - "id": 17972, - "logprob": -0.04058838, - "special": false, - "text": " pile" - }, - { - "id": 302, - "logprob": -0.00027012825, - "special": false, - "text": " of" - }, - { - "id": 2445, - "logprob": -0.09503174, - "special": false, - "text": " money" - }, - { - "id": 28723, - "logprob": -0.006942749, - "special": false, - "text": "." - } - ], - "top_tokens": null - }, - "generated_text": " A chicken is sitting on a pile of money." - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1247, - "logprob": -5.2460938, - "text": "User" - }, - { - "id": 28747, - "logprob": -6.9570312, - "text": ":" - }, - { - "id": 32000, - "logprob": -16.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -23.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -22.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -23.625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.5, - "text": "" - }, - { - "id": 32001, - "logprob": -19.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -23.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.53125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.328125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, { "id": 32001, "logprob": -17.921875, "text": "" }, + { + "id": 32000, + "logprob": -3.015625, + "text": "" + }, { "id": 32001, - "logprob": -19.1875, + "logprob": -22.109375, "text": "" }, { "id": 32001, - "logprob": -18.640625, + "logprob": -18.96875, "text": "" }, { @@ -3733,76 +3913,6 @@ "logprob": -20.125, "text": "" }, - { - "id": 32001, - "logprob": -19.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -23.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -23.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -23.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.75, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.34375, - "text": "" - }, { "id": 32001, "logprob": -17.125, @@ -3810,932 +3920,12 @@ }, { "id": 32001, - "logprob": -18.671875, + "logprob": -17.8125, "text": "" }, { "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.25, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8359375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4140625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.7265625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -14.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8515625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.0, - "text": "" - }, - { - "id": 32001, - "logprob": -17.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.6875, - "text": "" - }, - { - "id": 32001, - "logprob": -22.625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.765625, + "logprob": -19.3125, "text": "" }, { @@ -4743,1385 +3933,11 @@ "logprob": -19.65625, "text": "" }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.75, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.1171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -22.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.5, - "text": "" - }, - { - "id": 32001, - "logprob": -17.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -22.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.75, - "text": "" - }, - { - "id": 32001, - "logprob": -16.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -22.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.53125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4296875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.6484375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0, - "text": "" - }, - { - "id": 32001, - "logprob": -21.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.890625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -13.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4140625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9921875, - "text": "" - }, - { - "id": 32000, - "logprob": -2.0429688, - "text": "" - }, - { - "id": 12018, - "logprob": -12.0390625, - "text": "Write" - }, - { - "id": 528, - "logprob": -10.25, - "text": "me" - }, - { - "id": 264, - "logprob": -0.10443115, - "text": "a" - }, - { - "id": 2485, - "logprob": -4.5742188, - "text": "short" - }, - { - "id": 2838, - "logprob": -0.22729492, - "text": "story" - }, - { - "id": 32002, - "logprob": -10.84375, - "text": "" - }, - { - "id": 259, - "logprob": -20.1875, - "text": " " - }, - { - "id": 13, - "logprob": -8.7578125, - "text": "\n" - }, - { - "id": 7226, - "logprob": -10.4140625, - "text": "Ass" - }, - { - "id": 11143, - "logprob": -13.6328125, - "text": "istant" - }, - { - "id": 28747, - "logprob": -0.0056533813, - "text": ":" - } - ], - "seed": null, - "tokens": [ - { - "id": 330, - "logprob": -0.12963867, - "special": false, - "text": " A" - }, - { - "id": 13088, - "logprob": -0.6660156, - "special": false, - "text": " chicken" - }, - { - "id": 349, - "logprob": -0.29516602, - "special": false, - "text": " is" - }, - { - "id": 6398, - "logprob": -0.060028076, - "special": false, - "text": " sitting" - }, - { - "id": 356, - "logprob": -0.27075195, - "special": false, - "text": " on" - }, - { - "id": 264, - "logprob": -0.1427002, - "special": false, - "text": " a" - }, - { - "id": 17972, - "logprob": -0.04067993, - "special": false, - "text": " pile" - }, - { - "id": 302, - "logprob": -0.000269413, - "special": false, - "text": " of" - }, - { - "id": 2445, - "logprob": -0.09387207, - "special": false, - "text": " money" - }, - { - "id": 28723, - "logprob": -0.0069236755, - "special": false, - "text": "." - } - ], - "top_tokens": null - }, - "generated_text": " A chicken is sitting on a pile of money." - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1247, - "logprob": -5.2421875, - "text": "User" - }, - { - "id": 28747, - "logprob": -6.9570312, - "text": ":" - }, - { - "id": 32000, - "logprob": -16.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -23.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -22.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -23.625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.5, - "text": "" - }, - { - "id": 32001, - "logprob": -19.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -23.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.53125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.328125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -23.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -23.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -23.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.75, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.34375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0, - "text": "" - }, - { - "id": 32001, - "logprob": -18.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.25, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.921875, - "text": "" - }, { "id": 32001, "logprob": -16.3125, "text": "" }, - { - "id": 32001, - "logprob": -19.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.953125, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8359375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.03125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.96875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -15.4140625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.7265625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.5625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -14.2421875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.671875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.578125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.25, - "text": "" - }, - { - "id": 32001, - "logprob": -17.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8671875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.640625, - "text": "" - }, - { - "id": 32001, - "logprob": -14.8515625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.25, - "text": "" - }, - { - "id": 32001, - "logprob": -19.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.71875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.5, - "text": "" - }, - { - "id": 32001, - "logprob": -18.296875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -16.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.515625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.25, - "text": "" - }, - { - "id": 32001, - "logprob": -20.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.609375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.90625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.5, - "text": "" - }, - { - "id": 32001, - "logprob": -20.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.546875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.484375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.265625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -15.9453125, - "text": "" - }, - { - "id": 32001, - "logprob": -21.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.515625, - "text": "" - }, { "id": 32001, "logprob": -19.375, @@ -6129,152 +3945,27 @@ }, { "id": 32001, - "logprob": -17.796875, + "logprob": -20.046875, "text": "" }, { "id": 32001, - "logprob": -16.03125, + "logprob": -20.828125, "text": "" }, { "id": 32001, - "logprob": -18.671875, + "logprob": -15.8046875, "text": "" }, { "id": 32001, - "logprob": -20.15625, + "logprob": -16.25, "text": "" }, { "id": 32001, - "logprob": -20.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.84375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.78125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.234375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.9375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.703125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.15625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.203125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.6875, - "text": "" - }, - { - "id": 32001, - "logprob": -22.625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -18.46875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.546875, + "logprob": -19.953125, "text": "" }, { @@ -6284,37 +3975,42 @@ }, { "id": 32001, - "logprob": -16.453125, + "logprob": -21.59375, "text": "" }, { "id": 32001, - "logprob": -21.09375, + "logprob": -20.515625, "text": "" }, { "id": 32001, - "logprob": -19.5625, + "logprob": -19.171875, "text": "" }, { "id": 32001, - "logprob": -19.15625, + "logprob": -19.296875, "text": "" }, { "id": 32001, - "logprob": -16.171875, + "logprob": -16.71875, "text": "" }, { "id": 32001, - "logprob": -17.671875, + "logprob": -20.46875, "text": "" }, { "id": 32001, - "logprob": -18.859375, + "logprob": -21.125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.40625, "text": "" }, { @@ -6324,37 +4020,77 @@ }, { "id": 32001, - "logprob": -21.8125, + "logprob": -17.765625, "text": "" }, { "id": 32001, - "logprob": -19.96875, + "logprob": -20.328125, "text": "" }, { "id": 32001, - "logprob": -19.046875, + "logprob": -15.2734375, "text": "" }, { "id": 32001, - "logprob": -19.78125, + "logprob": -18.84375, "text": "" }, { "id": 32001, - "logprob": -19.421875, + "logprob": -17.875, "text": "" }, { "id": 32001, - "logprob": -21.21875, + "logprob": -15.578125, "text": "" }, { "id": 32001, - "logprob": -21.515625, + "logprob": -18.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.859375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.15625, "text": "" }, { @@ -6364,37 +4100,12 @@ }, { "id": 32001, - "logprob": -20.734375, + "logprob": -19.546875, "text": "" }, { "id": 32001, - "logprob": -19.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -19.828125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.359375, - "text": "" - }, - { - "id": 32001, - "logprob": -17.75, + "logprob": -14.1953125, "text": "" }, { @@ -6404,182 +4115,42 @@ }, { "id": 32001, - "logprob": -18.765625, + "logprob": -18.1875, "text": "" }, { "id": 32001, - "logprob": -20.453125, + "logprob": -17.421875, "text": "" }, { "id": 32001, - "logprob": -19.890625, + "logprob": -20.421875, "text": "" }, { "id": 32001, - "logprob": -16.015625, + "logprob": -20.0, "text": "" }, { "id": 32001, - "logprob": -18.90625, + "logprob": -20.359375, "text": "" }, { "id": 32001, - "logprob": -15.953125, + "logprob": -18.03125, "text": "" }, { "id": 32001, - "logprob": -21.46875, + "logprob": -17.203125, "text": "" }, { "id": 32001, - "logprob": -19.984375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.875, - "text": "" - }, - { - "id": 32001, - "logprob": -18.859375, - "text": "" - }, - { - "id": 32001, - "logprob": -16.046875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.140625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.6875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.453125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.1875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.765625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.65625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -17.109375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.171875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.4375, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0625, - "text": "" - }, - { - "id": 32001, - "logprob": -16.734375, - "text": "" - }, - { - "id": 32001, - "logprob": -19.21875, - "text": "" - }, - { - "id": 32001, - "logprob": -16.421875, - "text": "" - }, - { - "id": 32001, - "logprob": -20.015625, - "text": "" - }, - { - "id": 32001, - "logprob": -17.796875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.3125, - "text": "" - }, - { - "id": 32001, - "logprob": -20.390625, - "text": "" - }, - { - "id": 32001, - "logprob": -19.28125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.59375, - "text": "" - }, - { - "id": 32001, - "logprob": -18.8125, - "text": "" - }, - { - "id": 32001, - "logprob": -19.09375, - "text": "" - }, - { - "id": 32001, - "logprob": -20.890625, + "logprob": -16.84375, "text": "" }, { @@ -6589,72 +4160,32 @@ }, { "id": 32001, - "logprob": -18.75, + "logprob": -15.71875, "text": "" }, { "id": 32001, - "logprob": -18.90625, + "logprob": -18.203125, "text": "" }, { "id": 32001, - "logprob": -21.375, + "logprob": -18.4375, "text": "" }, { "id": 32001, - "logprob": -16.640625, + "logprob": -18.46875, "text": "" }, { "id": 32001, - "logprob": -20.859375, + "logprob": -17.3125, "text": "" }, { "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -15.875, - "text": "" - }, - { - "id": 32001, - "logprob": -15.1171875, - "text": "" - }, - { - "id": 32001, - "logprob": -17.078125, - "text": "" - }, - { - "id": 32001, - "logprob": -18.921875, - "text": "" - }, - { - "id": 32001, - "logprob": -21.40625, - "text": "" - }, - { - "id": 32001, - "logprob": -21.0, - "text": "" - }, - { - "id": 32001, - "logprob": -20.75, - "text": "" - }, - { - "id": 32001, - "logprob": -16.25, + "logprob": -16.265625, "text": "" }, { @@ -6664,37 +4195,47 @@ }, { "id": 32001, - "logprob": -21.59375, + "logprob": -14.734375, "text": "" }, { "id": 32001, - "logprob": -22.421875, + "logprob": -20.6875, "text": "" }, { "id": 32001, - "logprob": -20.9375, + "logprob": -20.21875, "text": "" }, { "id": 32001, - "logprob": -19.671875, + "logprob": -18.359375, "text": "" }, { "id": 32001, - "logprob": -20.890625, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.2988281, + "text": "" + }, + { + "id": 32001, + "logprob": -25.75, "text": "" }, { "id": 32001, - "logprob": -16.921875, + "logprob": -18.421875, "text": "" }, { "id": 32001, - "logprob": -17.5, + "logprob": -19.265625, "text": "" }, { @@ -6704,42 +4245,77 @@ }, { "id": 32001, - "logprob": -22.1875, + "logprob": -17.203125, "text": "" }, { "id": 32001, - "logprob": -18.734375, + "logprob": -20.140625, "text": "" }, { "id": 32001, - "logprob": -16.40625, + "logprob": -17.96875, "text": "" }, { "id": 32001, - "logprob": -18.453125, + "logprob": -16.453125, "text": "" }, { "id": 32001, - "logprob": -20.234375, + "logprob": -19.65625, "text": "" }, { "id": 32001, - "logprob": -22.28125, + "logprob": -18.203125, "text": "" }, { "id": 32001, - "logprob": -18.515625, + "logprob": -16.921875, "text": "" }, { "id": 32001, - "logprob": -15.4296875, + "logprob": -15.75, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.25, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, "text": "" }, { @@ -6749,27 +4325,187 @@ }, { "id": 32001, - "logprob": -14.6484375, + "logprob": -19.890625, "text": "" }, { "id": 32001, - "logprob": -21.46875, + "logprob": -20.421875, "text": "" }, { "id": 32001, - "logprob": -18.875, + "logprob": -19.34375, "text": "" }, { "id": 32001, - "logprob": -20.859375, + "logprob": -20.140625, "text": "" }, { "id": 32001, - "logprob": -17.078125, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.4453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.703125, "text": "" }, { @@ -6777,11 +4513,581 @@ "logprob": -16.4375, "text": "" }, + { + "id": 32001, + "logprob": -19.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32000, + "logprob": -2.7207031, + "text": "" + }, + { + "id": 32001, + "logprob": -23.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.203125, + "text": "" + }, { "id": 32001, "logprob": -21.015625, "text": "" }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.28125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.6640625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32000, + "logprob": -3.0917969, + "text": "" + }, + { + "id": 32001, + "logprob": -25.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.1328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.0, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -22.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.75, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -22.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.75, + "text": "" + }, + { + "id": 32001, + "logprob": -14.6484375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.046875, + "text": "" + }, { "id": 32001, "logprob": -21.234375, @@ -6794,22 +5100,22 @@ }, { "id": 32001, - "logprob": -21.484375, + "logprob": -21.515625, "text": "" }, { "id": 32001, - "logprob": -20.015625, + "logprob": -20.0, "text": "" }, { "id": 32001, - "logprob": -18.84375, + "logprob": -18.78125, "text": "" }, { "id": 32001, - "logprob": -16.421875, + "logprob": -16.375, "text": "" }, { @@ -6819,57 +5125,57 @@ }, { "id": 32001, - "logprob": -16.65625, + "logprob": -16.703125, "text": "" }, { "id": 32001, - "logprob": -13.640625, + "logprob": -13.625, "text": "" }, { "id": 32001, - "logprob": -15.4140625, + "logprob": -15.375, "text": "" }, { "id": 32001, - "logprob": -17.546875, + "logprob": -17.515625, "text": "" }, { "id": 32001, - "logprob": -21.859375, + "logprob": -21.921875, "text": "" }, { "id": 32001, - "logprob": -15.65625, + "logprob": -15.640625, "text": "" }, { "id": 32001, - "logprob": -16.484375, + "logprob": -16.46875, "text": "" }, { "id": 32001, - "logprob": -16.359375, + "logprob": -16.421875, "text": "" }, { "id": 32001, - "logprob": -19.9375, + "logprob": -19.890625, "text": "" }, { "id": 32001, - "logprob": -17.875, + "logprob": -17.890625, "text": "" }, { "id": 32001, - "logprob": -17.453125, + "logprob": -17.40625, "text": "" }, { @@ -6879,72 +5185,72 @@ }, { "id": 32001, - "logprob": -19.171875, + "logprob": -19.1875, "text": "" }, { "id": 32001, - "logprob": -15.9921875, + "logprob": -15.9609375, "text": "" }, { "id": 32000, - "logprob": -2.0429688, + "logprob": -2.0332031, "text": "" }, { "id": 12018, - "logprob": -12.03125, + "logprob": -12.078125, "text": "Write" }, { "id": 528, - "logprob": -10.25, + "logprob": -10.109375, "text": "me" }, { "id": 264, - "logprob": -0.10437012, + "logprob": -0.103515625, "text": "a" }, { "id": 2485, - "logprob": -4.578125, + "logprob": -4.5664062, "text": "short" }, { "id": 2838, - "logprob": -0.22924805, + "logprob": -0.23864746, "text": "story" }, { "id": 32002, - "logprob": -10.84375, + "logprob": -10.9609375, "text": "" }, { "id": 259, - "logprob": -20.171875, + "logprob": -20.34375, "text": " " }, { "id": 13, - "logprob": -8.765625, + "logprob": -8.5546875, "text": "\n" }, { "id": 7226, - "logprob": -10.4140625, + "logprob": -10.484375, "text": "Ass" }, { "id": 11143, - "logprob": -13.640625, + "logprob": -13.6015625, "text": "istant" }, { "id": 28747, - "logprob": -0.005744934, + "logprob": -0.008308411, "text": ":" } ], @@ -6952,61 +5258,1835 @@ "tokens": [ { "id": 330, - "logprob": -0.12976074, + "logprob": -0.09448242, "special": false, "text": " A" }, { "id": 13088, - "logprob": -0.66308594, + "logprob": -0.6743164, "special": false, "text": " chicken" }, { "id": 349, - "logprob": -0.29541016, + "logprob": -0.31201172, "special": false, "text": " is" }, { "id": 6398, - "logprob": -0.05996704, + "logprob": -0.051635742, "special": false, "text": " sitting" }, { "id": 356, - "logprob": -0.27075195, + "logprob": -0.34033203, "special": false, "text": " on" }, { "id": 264, - "logprob": -0.14160156, + "logprob": -0.1194458, "special": false, "text": " a" }, { "id": 17972, - "logprob": -0.040863037, + "logprob": -0.032562256, "special": false, "text": " pile" }, { "id": 302, - "logprob": -0.00027036667, + "logprob": -0.00018787384, "special": false, "text": " of" }, { "id": 2445, - "logprob": -0.093322754, + "logprob": -0.07122803, "special": false, "text": " money" }, { "id": 28723, - "logprob": -0.006931305, + "logprob": -0.0041007996, + "special": false, + "text": "." + } + ], + "top_tokens": null + }, + "generated_text": " A chicken is sitting on a pile of money." + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1247, + "logprob": -5.2382812, + "text": "User" + }, + { + "id": 28747, + "logprob": -6.9492188, + "text": ":" + }, + { + "id": 32000, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -23.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.28125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -22.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -23.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -23.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.78125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.859375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -14.8828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -23.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -23.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -23.0, + "text": "" + }, + { + "id": 32001, + "logprob": -19.75, + "text": "" + }, + { + "id": 32001, + "logprob": -17.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.0, + "text": "" + }, + { + "id": 32001, + "logprob": -18.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -22.109375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.8046875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.2734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.859375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.15625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.546875, + "text": "" + }, + { + "id": 32001, + "logprob": -14.1953125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.0, + "text": "" + }, + { + "id": 32001, + "logprob": -20.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.84375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -14.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32000, + "logprob": -3.2988281, + "text": "" + }, + { + "id": 32001, + "logprob": -25.75, + "text": "" + }, + { + "id": 32001, + "logprob": -18.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.65625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.75, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.25, + "text": "" + }, + { + "id": 32001, + "logprob": -16.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.953125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.4453125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.21875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.359375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.5625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32000, + "logprob": -2.7207031, + "text": "" + }, + { + "id": 32001, + "logprob": -23.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.078125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.28125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.5, + "text": "" + }, + { + "id": 32001, + "logprob": -19.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.203125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.03125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.328125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.9375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.0, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.765625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.6640625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.3125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -19.671875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.96875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.8125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.09375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -18.875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.34375, + "text": "" + }, + { + "id": 32001, + "logprob": -19.171875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.578125, + "text": "" + }, + { + "id": 32000, + "logprob": -3.0917969, + "text": "" + }, + { + "id": 32001, + "logprob": -25.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -20.6875, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.71875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.453125, + "text": "" + }, + { + "id": 32001, + "logprob": -15.796875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.1328125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.125, + "text": "" + }, + { + "id": 32001, + "logprob": -18.90625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.734375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.25, + "text": "" + }, + { + "id": 32001, + "logprob": -19.5, + "text": "" + }, + { + "id": 32001, + "logprob": -21.59375, + "text": "" + }, + { + "id": 32001, + "logprob": -22.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -21.0, + "text": "" + }, + { + "id": 32001, + "logprob": -16.984375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.53125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -22.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.75, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.4375, + "text": "" + }, + { + "id": 32001, + "logprob": -20.265625, + "text": "" + }, + { + "id": 32001, + "logprob": -22.296875, + "text": "" + }, + { + "id": 32001, + "logprob": -18.484375, + "text": "" + }, + { + "id": 32001, + "logprob": -15.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.75, + "text": "" + }, + { + "id": 32001, + "logprob": -14.6484375, + "text": "" + }, + { + "id": 32001, + "logprob": -21.609375, + "text": "" + }, + { + "id": 32001, + "logprob": -18.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -20.828125, + "text": "" + }, + { + "id": 32001, + "logprob": -17.015625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.046875, + "text": "" + }, + { + "id": 32001, + "logprob": -21.234375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.140625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.0, + "text": "" + }, + { + "id": 32001, + "logprob": -18.78125, + "text": "" + }, + { + "id": 32001, + "logprob": -16.375, + "text": "" + }, + { + "id": 32001, + "logprob": -16.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.703125, + "text": "" + }, + { + "id": 32001, + "logprob": -13.625, + "text": "" + }, + { + "id": 32001, + "logprob": -15.375, + "text": "" + }, + { + "id": 32001, + "logprob": -17.515625, + "text": "" + }, + { + "id": 32001, + "logprob": -21.921875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.640625, + "text": "" + }, + { + "id": 32001, + "logprob": -16.46875, + "text": "" + }, + { + "id": 32001, + "logprob": -16.421875, + "text": "" + }, + { + "id": 32001, + "logprob": -19.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.890625, + "text": "" + }, + { + "id": 32001, + "logprob": -17.40625, + "text": "" + }, + { + "id": 32001, + "logprob": -20.390625, + "text": "" + }, + { + "id": 32001, + "logprob": -19.1875, + "text": "" + }, + { + "id": 32001, + "logprob": -15.9609375, + "text": "" + }, + { + "id": 32000, + "logprob": -2.0332031, + "text": "" + }, + { + "id": 12018, + "logprob": -12.078125, + "text": "Write" + }, + { + "id": 528, + "logprob": -10.109375, + "text": "me" + }, + { + "id": 264, + "logprob": -0.103515625, + "text": "a" + }, + { + "id": 2485, + "logprob": -4.5664062, + "text": "short" + }, + { + "id": 2838, + "logprob": -0.23864746, + "text": "story" + }, + { + "id": 32002, + "logprob": -10.9609375, + "text": "" + }, + { + "id": 259, + "logprob": -20.34375, + "text": " " + }, + { + "id": 13, + "logprob": -8.5546875, + "text": "\n" + }, + { + "id": 7226, + "logprob": -10.484375, + "text": "Ass" + }, + { + "id": 11143, + "logprob": -13.6015625, + "text": "istant" + }, + { + "id": 28747, + "logprob": -0.008308411, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 330, + "logprob": -0.09448242, + "special": false, + "text": " A" + }, + { + "id": 13088, + "logprob": -0.6743164, + "special": false, + "text": " chicken" + }, + { + "id": 349, + "logprob": -0.31201172, + "special": false, + "text": " is" + }, + { + "id": 6398, + "logprob": -0.051635742, + "special": false, + "text": " sitting" + }, + { + "id": 356, + "logprob": -0.34033203, + "special": false, + "text": " on" + }, + { + "id": 264, + "logprob": -0.1194458, + "special": false, + "text": " a" + }, + { + "id": 17972, + "logprob": -0.032562256, + "special": false, + "text": " pile" + }, + { + "id": 302, + "logprob": -0.00018763542, + "special": false, + "text": " of" + }, + { + "id": 2445, + "logprob": -0.07122803, + "special": false, + "text": " money" + }, + { + "id": 28723, + "logprob": -0.0041007996, "special": false, "text": "." } diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json index a3b18d0a..da2ac897 100644 --- a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json +++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_next_simple.json @@ -8,61 +8,61 @@ "tokens": [ { "id": 330, - "logprob": -0.13000488, + "logprob": -0.08660889, "special": false, "text": " A" }, { "id": 13088, - "logprob": -0.6713867, + "logprob": -0.7089844, "special": false, "text": " chicken" }, { "id": 349, - "logprob": -0.2980957, + "logprob": -0.32885742, "special": false, "text": " is" }, { "id": 6398, - "logprob": -0.060638428, + "logprob": -0.05126953, "special": false, "text": " sitting" }, { "id": 356, - "logprob": -0.27319336, + "logprob": -0.35229492, "special": false, "text": " on" }, { "id": 264, - "logprob": -0.140625, + "logprob": -0.12561035, "special": false, "text": " a" }, { "id": 17972, - "logprob": -0.040405273, + "logprob": -0.038085938, "special": false, "text": " pile" }, { "id": 302, - "logprob": -0.0002708435, + "logprob": -0.00018656254, "special": false, "text": " of" }, { "id": 2445, - "logprob": -0.095336914, + "logprob": -0.07293701, "special": false, "text": " money" }, { "id": 28723, - "logprob": -0.0068359375, + "logprob": -0.004852295, "special": false, "text": "." } diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json index 86c95b29..bf2dc5a1 100644 --- a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json +++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json @@ -8,115 +8,115 @@ "tokens": [ { "id": 415, - "logprob": -0.04421997, + "logprob": -0.039886475, "special": false, "text": " The" }, { "id": 12072, - "logprob": -0.13500977, + "logprob": -0.1430664, "special": false, "text": " cow" }, { "id": 349, - "logprob": -0.06750488, + "logprob": -0.056488037, "special": false, "text": " is" }, { "id": 6328, - "logprob": -0.6352539, + "logprob": -0.6855469, "special": false, "text": " standing" }, { "id": 356, - "logprob": -0.16186523, + "logprob": -0.1685791, "special": false, "text": " on" }, { "id": 272, - "logprob": -0.5078125, + "logprob": -0.50097656, "special": false, "text": " the" }, { "id": 10305, - "logprob": -0.017913818, + "logprob": -0.017303467, "special": false, "text": " beach" }, { "id": 304, - "logprob": -1.5205078, + "logprob": -1.3564453, "special": false, "text": " and" }, { "id": 272, - "logprob": -0.029174805, + "logprob": -0.017868042, "special": false, "text": " the" }, { "id": 13088, - "logprob": -0.003479004, + "logprob": -0.0027103424, "special": false, "text": " chicken" }, { "id": 349, - "logprob": -0.0035095215, + "logprob": -0.003156662, "special": false, "text": " is" }, { "id": 6398, - "logprob": -0.3088379, + "logprob": -0.37304688, "special": false, "text": " sitting" }, { "id": 356, - "logprob": -0.027755737, + "logprob": -0.034576416, "special": false, "text": " on" }, { "id": 264, - "logprob": -0.31884766, + "logprob": -0.29418945, "special": false, "text": " a" }, { "id": 17972, - "logprob": -0.047943115, + "logprob": -0.042877197, "special": false, "text": " pile" }, { "id": 302, - "logprob": -0.0002925396, + "logprob": -0.00028443336, "special": false, "text": " of" }, { "id": 2445, - "logprob": -0.02935791, + "logprob": -0.023223877, "special": false, "text": " money" }, { "id": 28723, - "logprob": -0.031219482, + "logprob": -0.018157959, "special": false, "text": "." }, { "id": 32002, - "logprob": -0.00034475327, + "logprob": -0.00018393993, "special": true, "text": "" }, diff --git a/router/Cargo.toml b/router/Cargo.toml index 853f46b1..5855ac86 100644 --- a/router/Cargo.toml +++ b/router/Cargo.toml @@ -22,6 +22,7 @@ text-generation-client = { path = "client" } clap = { version = "4.4.5", features = ["derive", "env"] } futures = "0.3.28" hf-hub = { workspace = true } +itertools = "0.10" jsonschema = { version = "0.17.1", features = ["draft202012"] } metrics = "0.21.1" metrics-exporter-prometheus = { version = "0.15.1", features = [] } diff --git a/router/src/config.rs b/router/src/config.rs index 29fefd5b..ccbdd8b2 100644 --- a/router/src/config.rs +++ b/router/src/config.rs @@ -71,10 +71,12 @@ fn get_unpadded_features( let current_aspect_ratio: f64 = current_width as f64 / current_height as f64; let (current_height, current_width) = if aspect_ratio > current_aspect_ratio { let new_height = (height * current_width) / width; - (new_height, current_width) + let padding = (current_height - new_height) / 2; + (current_height - (2 * padding), current_width) } else { let new_width = (width * current_height) / height; - (current_height, new_width) + let padding = (current_width - new_width) / 2; + (current_height, current_width - (2 * padding)) }; let unpadded_features = current_height * current_width; @@ -88,7 +90,9 @@ impl LlavaNext { let patch_size = self.vision_config.patch_size; assert!(image_size % patch_size == 0); let npatches = image_size / patch_size; - let (num_patch_height, num_patch_width) = + // Dimensions are intentionally swapped to be bug-compatible with + // upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59 + let (num_patch_width, num_patch_height) = get_anyres_image_grid_shape(height, width, &self.image_grid_pinpoints, image_size); let (unpadded_features, newline_features) = @@ -112,7 +116,7 @@ pub struct Idefics2 {} impl Idefics2 { pub fn get_number_of_features(&self, _height: usize, _width: usize) -> usize { - 320 + 64 } } diff --git a/router/src/lib.rs b/router/src/lib.rs index 126726c6..4ba76f5f 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -70,6 +70,25 @@ impl HubTokenizerConfig { } } +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "processor_class")] +pub enum HubPreprocessorConfig { + Idefics2Processor(Idefics2Preprocessor), +} + +impl HubPreprocessorConfig { + pub fn from_file>(filename: P) -> Option { + let content = std::fs::read_to_string(filename).ok()?; + serde_json::from_str(&content).ok() + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Idefics2Preprocessor { + #[serde(default)] + do_image_splitting: bool, +} + #[derive(Debug, Clone, Deserialize, Default)] pub struct HubProcessorConfig { pub chat_template: Option, diff --git a/router/src/main.rs b/router/src/main.rs index a7caec2e..68b6b1fc 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -13,7 +13,9 @@ use std::io::BufReader; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::path::{Path, PathBuf}; use text_generation_router::config::Config; -use text_generation_router::{server, HubModelInfo, HubProcessorConfig, HubTokenizerConfig}; +use text_generation_router::{ + server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig, +}; use thiserror::Error; use tokenizers::Tokenizer; use tower_http::cors::AllowOrigin; @@ -214,6 +216,7 @@ async fn main() -> Result<(), RouterError> { tokenizer_filename, config_filename, tokenizer_config_filename, + preprocessor_config_filename, processor_config_filename, model_info, ) = match api { @@ -221,6 +224,7 @@ async fn main() -> Result<(), RouterError> { Some(local_path.join("tokenizer.json")), Some(local_path.join("config.json")), Some(local_path.join("tokenizer_config.json")), + Some(local_path.join("preprocessor_config.json")), Some(local_path.join("processor_config.json")), None, ), @@ -237,6 +241,7 @@ async fn main() -> Result<(), RouterError> { }; let config_filename = api_repo.get("config.json").await.ok(); let tokenizer_config_filename = api_repo.get("tokenizer_config.json").await.ok(); + let preprocessor_config_filename = api_repo.get("preprocessor_config.json").await.ok(); let processor_config_filename = api_repo.get("processor_config.json").await.ok(); let model_info = if let Some(model_info) = get_model_info(&api_repo).await { @@ -249,6 +254,7 @@ async fn main() -> Result<(), RouterError> { tokenizer_filename, config_filename, tokenizer_config_filename, + preprocessor_config_filename, processor_config_filename, model_info, ) @@ -263,6 +269,7 @@ async fn main() -> Result<(), RouterError> { repo.get("tokenizer.json"), repo.get("config.json"), repo.get("tokenizer_config.json"), + repo.get("preprocessor_config.json"), repo.get("processor_config.json"), None, ) @@ -300,6 +307,8 @@ async fn main() -> Result<(), RouterError> { HubTokenizerConfig::default() }); + let preprocessor_config = + preprocessor_config_filename.and_then(HubPreprocessorConfig::from_file); let processor_config = processor_config_filename .and_then(HubProcessorConfig::from_file) .unwrap_or_default(); @@ -361,6 +370,7 @@ async fn main() -> Result<(), RouterError> { ngrok_authtoken, ngrok_edge, tokenizer_config, + preprocessor_config, processor_config, messages_api_enabled, disable_grammar_support, diff --git a/router/src/server.rs b/router/src/server.rs index 7f15bfdd..0cb08d4e 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -12,9 +12,9 @@ use crate::kserve::{ use crate::validation::ValidationError; use crate::{ BestOfSequence, Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest, - GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig, HubTokenizerConfig, Info, - Message, PrefillToken, SimpleToken, StreamDetails, StreamResponse, Token, TokenizeResponse, - Usage, Validation, + GenerateResponse, GrammarType, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, + HubTokenizerConfig, Info, Message, PrefillToken, SimpleToken, StreamDetails, StreamResponse, + Token, TokenizeResponse, Usage, Validation, }; use crate::{ ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete, @@ -1423,6 +1423,7 @@ pub async fn run( _ngrok_authtoken: Option, _ngrok_edge: Option, tokenizer_config: HubTokenizerConfig, + preprocessor_config: Option, processor_config: HubProcessorConfig, messages_api_enabled: bool, grammar_support: bool, @@ -1636,6 +1637,7 @@ pub async fn run( validation_workers, tokenizer, config, + preprocessor_config, max_best_of, max_stop_sequences, max_top_n_tokens, diff --git a/router/src/validation.rs b/router/src/validation.rs index e2bf5a5d..12cf2ab3 100644 --- a/router/src/validation.rs +++ b/router/src/validation.rs @@ -1,13 +1,16 @@ /// Payload validation logic use crate::config::Config; use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput}; -use crate::{GenerateParameters, GenerateRequest, GrammarType}; +use crate::{ + GenerateParameters, GenerateRequest, GrammarType, HubPreprocessorConfig, Idefics2Preprocessor, +}; use base64::{engine::general_purpose::STANDARD, Engine}; use image::{io::Reader as ImageReader, ImageFormat}; use jsonschema::{Draft, JSONSchema}; use rand::{thread_rng, Rng}; use serde_json::Value; use std::io::Cursor; +use std::iter; use text_generation_client::{Chunk, Image, InputChunk}; use thiserror::Error; use tokenizers::tokenizer::Tokenizer; @@ -36,6 +39,7 @@ impl Validation { workers: usize, tokenizer: Option, config: Option, + preprocessor_config: Option, max_best_of: usize, max_stop_sequences: usize, max_top_n_tokens: u32, @@ -53,12 +57,18 @@ impl Validation { for _ in 0..workers { let tokenizer_clone = tokenizer.clone(); let config_clone = config.clone(); + let preprocessor_config_clone = preprocessor_config.clone(); let (tokenizer_sender, tokenizer_receiver) = mpsc::unbounded_channel(); senders.push(tokenizer_sender); // Spawn worker tokio::task::spawn_blocking(move || { - tokenizer_worker(tokenizer_clone, config_clone, tokenizer_receiver) + tokenizer_worker( + tokenizer_clone, + config_clone, + preprocessor_config_clone, + tokenizer_receiver, + ) }); } @@ -422,13 +432,20 @@ async fn round_robin_task( fn tokenizer_worker( tokenizer: Tokenizer, config: Option, + preprocessor_config: Option, mut receiver: mpsc::UnboundedReceiver, ) { // Loop over requests while let Some(((inputs, truncate), response_tx, parent_span)) = receiver.blocking_recv() { parent_span.in_scope(|| { response_tx - .send(prepare_input(inputs, truncate, &tokenizer, &config)) + .send(prepare_input( + inputs, + truncate, + &tokenizer, + config.as_ref(), + preprocessor_config.as_ref(), + )) .unwrap_or(()) }) } @@ -508,16 +525,67 @@ fn fetch_image(input: &str) -> Result<(Vec, String, usize, usize), Validatio } } +fn image_tokens( + config: &Config, + preprocessor_config: Option<&HubPreprocessorConfig>, + height: usize, + width: usize, +) -> String { + use Config::*; + use HubPreprocessorConfig::*; + match config { + Idefics => "".to_string(), + Idefics2(config) => { + const FAKE: &str = ""; + const IMAGE: &str = ""; + + let slots = config.get_number_of_features(height, width); + + let mut image_string = String::with_capacity(2 * FAKE.len() + slots * IMAGE.len()); + image_string.push_str(FAKE); + image_string.extend(iter::repeat(IMAGE).take(slots)); + image_string.push_str(FAKE); + + if matches!( + preprocessor_config, + Some(Idefics2Processor(Idefics2Preprocessor { + do_image_splitting: true, + .. + })) + ) { + image_string = image_string.repeat(5); + }; + + image_string + } + Paligemma(config) => "".repeat(config.get_number_of_features(height, width)), + LlavaNext(config) => "".repeat(config.get_number_of_features(height, width)), + _ => unimplemented!("Images tokens are not supported for this model configuration"), + } +} + +fn image_tokens_fixup(config: &Config, text: String) -> String { + match config { + Config::Idefics2(_) => { + const FAKE: &str = ""; + text.replace(&format!("{FAKE}{FAKE}"), FAKE) + } + _ => text, + } +} + /// Get input length and optionally truncate it fn prepare_input( inputs: String, _truncate: Option, tokenizer: &Tokenizer, - config: &Option, + config: Option<&Config>, + preprocessor_config: Option<&HubPreprocessorConfig>, ) -> Result<(tokenizers::Encoding, Vec), ValidationError> { + use Config::*; static RE: Lazy = Lazy::new(|| Regex::new(r"!\[\]\([^\)]*\)").unwrap()); let (tokenizer_query, input_chunks) = match config { - Some(Config::LlavaNext(config)) => { + Some(config @ (Idefics | Idefics2(_) | Paligemma(_) | LlavaNext(_))) => { let mut input_chunks = Vec::new(); let mut tokenizer_query = String::with_capacity(inputs.len()); let mut start = 0; @@ -529,88 +597,17 @@ fn prepare_input( tokenizer_query.push_str(&inputs[start..chunk_start]); } let (data, mimetype, height, width) = fetch_image(&inputs[chunk_start..chunk_end])?; - let slots = config.get_number_of_features(height, width); input_chunks.push(Chunk::Image(Image { data, mimetype }).into()); - tokenizer_query.push_str(&"".repeat(slots)); + tokenizer_query.push_str(&image_tokens(config, preprocessor_config, height, width)); start = chunk_end; } if start != inputs.len() { input_chunks.push(Chunk::Text(inputs[start..].to_string()).into()); tokenizer_query.push_str(&inputs[start..]); } - (tokenizer_query, input_chunks) - } - Some(Config::Paligemma(config)) => { - let mut input_chunks = Vec::new(); - let mut tokenizer_query = String::with_capacity(inputs.len()); - let mut start = 0; - for chunk in RE.find_iter(&inputs) { - let chunk_start = chunk.start(); - let chunk_end = chunk.end(); - if chunk_start != start { - input_chunks.push(Chunk::Text(inputs[start..chunk_start].to_string()).into()); - tokenizer_query.push_str(&inputs[start..chunk_start]); - } - let (data, mimetype, height, width) = fetch_image(&inputs[chunk_start..chunk_end])?; - let slots = config.get_number_of_features(height, width); - input_chunks.push(Chunk::Image(Image { data, mimetype }).into()); - tokenizer_query.push_str(&"".repeat(slots)); - start = chunk_end; - } - if start != inputs.len() { - input_chunks.push(Chunk::Text(inputs[start..].to_string()).into()); - tokenizer_query.push_str(&inputs[start..]); - } - (tokenizer_query, input_chunks) - } - Some(Config::Idefics2(config)) => { - let mut input_chunks = Vec::new(); - let mut tokenizer_query = String::with_capacity(inputs.len()); - let mut start = 0; - for chunk in RE.find_iter(&inputs) { - let chunk_start = chunk.start(); - let chunk_end = chunk.end(); - if chunk_start != start { - input_chunks.push(Chunk::Text(inputs[start..chunk_start].to_string()).into()); - tokenizer_query.push_str(&inputs[start..chunk_start]); - } - let (data, mimetype, height, width) = fetch_image(&inputs[chunk_start..chunk_end])?; - let slots = config.get_number_of_features(height, width); - tokenizer_query.push_str(""); - tokenizer_query.push_str(&"".repeat(slots)); - tokenizer_query.push_str(""); - input_chunks.push(Chunk::Image(Image { data, mimetype }).into()); - start = chunk_end; - } - if start != inputs.len() { - input_chunks.push(Chunk::Text(inputs[start..].to_string()).into()); - tokenizer_query.push_str(&inputs[start..]); - } - (tokenizer_query, input_chunks) - } - Some(Config::Idefics) => { - let mut input_chunks = Vec::new(); - let mut tokenizer_query = String::with_capacity(inputs.len()); - let mut start = 0; - for chunk in RE.find_iter(&inputs) { - let chunk_start = chunk.start(); - let chunk_end = chunk.end(); - if chunk_start != start { - input_chunks.push(Chunk::Text(inputs[start..chunk_start].to_string()).into()); - tokenizer_query.push_str(&inputs[start..chunk_start]); - } - let (data, mimetype, _height, _width) = - fetch_image(&inputs[chunk_start..chunk_end])?; - let slots = 1; - tokenizer_query.push_str(&"".repeat(slots)); - input_chunks.push(Chunk::Image(Image { data, mimetype }).into()); - start = chunk_end; - } - if start != inputs.len() { - input_chunks.push(Chunk::Text(inputs[start..].to_string()).into()); - tokenizer_query.push_str(&inputs[start..]); - } + tokenizer_query = image_tokens_fixup(config, tokenizer_query); + (tokenizer_query, input_chunks) } _ => (inputs.clone(), vec![Chunk::Text(inputs).into()]), @@ -750,7 +747,7 @@ pub enum ValidationError { #[cfg(test)] mod tests { use super::*; - use crate::config::{PaliTextConfig, Paligemma}; + use crate::config::{Idefics2, PaliTextConfig, Paligemma}; use crate::default_parameters; use crate::tests::get_tokenizer; @@ -769,6 +766,7 @@ mod tests { workers, tokenizer, config, + None, max_best_of, max_stop_sequence, max_top_n_tokens, @@ -803,6 +801,7 @@ mod tests { workers, tokenizer, config, + None, max_best_of, max_stop_sequence, max_top_n_tokens, @@ -836,6 +835,7 @@ mod tests { workers, tokenizer, config, + None, max_best_of, max_stop_sequence, max_top_n_tokens, @@ -874,6 +874,7 @@ mod tests { workers, tokenizer, config, + None, max_best_of, max_stop_sequence, max_top_n_tokens, @@ -941,6 +942,7 @@ mod tests { workers, tokenizer, config, + None, max_best_of, max_stop_sequences, max_top_n_tokens, @@ -1026,6 +1028,7 @@ mod tests { workers, tokenizer, Some(config), + None, max_best_of, max_stop_sequence, max_top_n_tokens, @@ -1058,4 +1061,83 @@ mod tests { "Failed to process images", ); } + + #[tokio::test] + async fn test_idefics2_correct_n_fake_tokens() { + let pixel_data = STANDARD.decode(PIXEL_GIF).unwrap(); + + let tokenizer = Some(get_tokenizer().await); + + let max_best_of = 2; + let max_stop_sequence = 3; + let max_top_n_tokens = 4; + let max_input_length = 5; + let max_total_tokens = 6; + let disable_grammar_support = true; + let workers = 1; + let config = Config::Idefics2(Idefics2 {}); + let validation = Validation::new( + workers, + tokenizer, + Some(config), + Some(HubPreprocessorConfig::Idefics2Processor( + Idefics2Preprocessor { + do_image_splitting: true, + }, + )), + max_best_of, + max_stop_sequence, + max_top_n_tokens, + max_input_length, + max_total_tokens, + disable_grammar_support, + ); + + let (encoding, chunks) = match validation + .tokenize( + format!( + "test![](data:image/gif;base64,{})![](data:image/gif;base64,{})", + PIXEL_GIF, PIXEL_GIF + ), + None, + ) + .await + { + Ok(Some((encoding, chunks))) => (encoding, chunks), + _ => panic!("Unexpected tokenization failure"), + }; + + assert!( + chunks + == vec![ + Chunk::Text("test".to_string()).into(), + Chunk::Image(Image { + data: pixel_data.clone(), + mimetype: "image/gif".to_string() + }) + .into(), + Chunk::Image(Image { + data: pixel_data.clone(), + mimetype: "image/gif".to_string() + }) + .into() + ], + "Failed to process images", + ); + + // Verify the number of fake tokens: + // + // - Two images surrounded/separated by a fake token = 3. + // - Both are split in 5 subimages, separated by a fake token: 2 * 4 + // + // Fake tokens get split up by the testing tokenizer, but we don't care. + assert_eq!( + encoding + .get_tokens() + .iter() + .filter(|t| *t == "fake") + .count(), + 11 + ); + } } diff --git a/server/text_generation_server/models/custom_modeling/llava_next.py b/server/text_generation_server/models/custom_modeling/llava_next.py index 9a670140..6d38442c 100644 --- a/server/text_generation_server/models/custom_modeling/llava_next.py +++ b/server/text_generation_server/models/custom_modeling/llava_next.py @@ -39,7 +39,7 @@ def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): Args: image_size (`tuple`): - The size of the input image in the format (width, height). + The size of the input image in the format (height, width). grid_pinpoints (`List`): A list containing possible resolutions. Each item in the list should be a tuple or list of the form `(height, width)`. @@ -47,7 +47,7 @@ def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): The size of each image patch. Returns: - tuple: The shape of the image patch grid in the format (width, height). + tuple: The shape of the image patch grid in the format (height, width). """ if not isinstance(grid_pinpoints, list): raise ValueError("grid_pinpoints should be a list of tuples or lists") @@ -230,7 +230,10 @@ class LlavaNextForConditionalGeneration(nn.Module): raise ValueError( "The number of patches is not consistent with the image size." ) - num_patch_height, num_patch_width = get_anyres_image_grid_shape( + + # Dimensions are intentionally swapped to be bug-compatible with + # upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59 + num_patch_width, num_patch_height = get_anyres_image_grid_shape( image_sizes[image_idx], self.config.image_grid_pinpoints, self.config.vision_config.image_size, diff --git a/server/text_generation_server/models/pali_gemma.py b/server/text_generation_server/models/pali_gemma.py index e883ce02..a167e467 100644 --- a/server/text_generation_server/models/pali_gemma.py +++ b/server/text_generation_server/models/pali_gemma.py @@ -39,7 +39,9 @@ class PaliGemmaBatch(VlmCausalLMBatch): # TODO do_convert_RGB should be on by default ? image = image.convert("RGB") image_input = processor.image_processor(image, return_tensors="pt") - full_text += image_text_replacement(image_input, config, image_id) + full_text += image_text_replacement( + processor, image_input, config, image_id + ) image_inputs.append(image_input) else: raise RuntimeError(f"Invalid chunk type {chunk_type}") diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py index 218d1167..1cdf37ea 100644 --- a/server/text_generation_server/models/vlm_causal_lm.py +++ b/server/text_generation_server/models/vlm_causal_lm.py @@ -1,3 +1,4 @@ +from itertools import repeat import torch from PIL import Image from io import BytesIO @@ -15,6 +16,9 @@ from text_generation_server.models.flash_mistral import ( tracer = trace.get_tracer(__name__) +IDEFICS2_FAKE_TOKEN = "" +IDEFICS2_IMAGE_TOKEN = "" + def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): """ @@ -22,7 +26,7 @@ def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): Args: image_size (`tuple`): - The size of the input image in the format (width, height). + The size of the input image in the format (height, width). grid_pinpoints (`List`): A list containing possible resolutions. Each item in the list should be a tuple or list of the form `(height, width)`. @@ -39,15 +43,13 @@ def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size): return height // patch_size, width // patch_size -def image_text_replacement(image_input, config, image_id) -> str: +def image_text_replacement(processor, image_input, config, image_id: int) -> str: if config.model_type == "idefics2": - # TODO technically depends on image splitting which is not implemented. - num_features = 320 - return ( - "" - + "" * num_features - + "" - ) + image_seq_len = 64 + image_str = f"{IDEFICS2_FAKE_TOKEN}{IDEFICS2_IMAGE_TOKEN * image_seq_len}{IDEFICS2_FAKE_TOKEN}" + if processor.image_processor.do_image_splitting: + image_str *= 5 + return image_str elif config.model_type == "llava_next": height, width = image_input["image_sizes"][image_id] num_features = get_number_of_features(height, width, config) @@ -64,20 +66,35 @@ def image_text_replacement(image_input, config, image_id) -> str: raise RuntimeError(f"Unknown config {config.model_type} for multimodal") +def image_text_replacement_fixup(config, text: str) -> str: + if config.model_type == "idefics2": + return text.replace( + f"{IDEFICS2_FAKE_TOKEN}{IDEFICS2_FAKE_TOKEN}", IDEFICS2_FAKE_TOKEN + ) + return text + + def get_unpadded_features( - height: int, width: int, npatches: int, num_patch_height: int, num_patch_width: int + original_height: int, + original_width: int, + npatches: int, + num_patch_height: int, + num_patch_width: int, ) -> Tuple[int, int]: current_height = npatches * num_patch_height current_width = npatches * num_patch_width - aspect_ratio: float = width / height + aspect_ratio: float = original_width / original_height current_aspect_ratio: float = current_width / current_height + if aspect_ratio > current_aspect_ratio: - new_height = (height * current_width) // width - current_height = new_height + new_height = (original_height * current_width) // original_width + padding = (current_height - new_height) // 2 + current_height = current_height - (2 * padding) else: - new_width = (width * current_height) // height - current_width = new_width + new_width = (original_width * current_height) // original_height + padding = (current_width - new_width) // 2 + current_width = current_width - (2 * padding) unpadded_features = current_height * current_width newline_features = current_height @@ -96,7 +113,9 @@ def get_number_of_features(height: int, width: int, config) -> int: npatches = image_size // patch_size - num_patch_height, num_patch_width = get_anyres_image_grid_shape( + # Dimensions are intentionally swapped to be bug-compatible with + # upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59 + num_patch_width, num_patch_height = get_anyres_image_grid_shape( [height, width], image_grid_pinpoints, image_size, @@ -168,9 +187,13 @@ class VlmCausalLMBatch(FlashCausalLMBatch): if chunk_type == "text": full_text += chunk.text elif chunk_type == "image": - full_text += image_text_replacement(image_inputs, config, image_id) + full_text += image_text_replacement( + processor, image_inputs, config, image_id + ) image_id += 1 + full_text = image_text_replacement_fixup(config, full_text) + batch_inputs.append(full_text) max_truncation = max(max_truncation, r.truncate) From 0e4ab6d31cbbaa3cc8be2046dfecfc5bde375494 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 27 Jun 2024 16:04:03 +0200 Subject: [PATCH 04/12] Fixing malformed rust tokenizers (#2134) * Fixing malformed rust tokenizers * Fix for deepseek too. --- Cargo.lock | 10 ++++------ router/src/config.rs | 1 + router/src/lib.rs | 3 +++ router/src/main.rs | 35 ++++++++++++++++++++++++++++++++--- 4 files changed, 40 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a03da8b2..090e2e80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3762,7 +3762,7 @@ dependencies = [ [[package]] name = "text-generation-benchmark" -version = "2.0.5-dev0" +version = "2.1.1-dev0" dependencies = [ "average", "clap", @@ -3783,7 +3783,7 @@ dependencies = [ [[package]] name = "text-generation-client" -version = "2.0.5-dev0" +version = "2.1.1-dev0" dependencies = [ "async-trait", "base64 0.22.1", @@ -3801,7 +3801,7 @@ dependencies = [ [[package]] name = "text-generation-launcher" -version = "2.0.5-dev0" +version = "2.1.1-dev0" dependencies = [ "clap", "ctrlc", @@ -3820,7 +3820,7 @@ dependencies = [ [[package]] name = "text-generation-router" -version = "2.0.5-dev0" +version = "2.1.1-dev0" dependencies = [ "async-stream", "axum 0.7.5", @@ -3855,8 +3855,6 @@ dependencies = [ "tokio-stream", "tower-http", "tracing", - "tracing-core", - "tracing-log 0.2.0", "tracing-opentelemetry 0.21.0", "tracing-subscriber", "utoipa", diff --git a/router/src/config.rs b/router/src/config.rs index ccbdd8b2..7737165e 100644 --- a/router/src/config.rs +++ b/router/src/config.rs @@ -162,6 +162,7 @@ pub enum Config { Baichuan, Paligemma(Paligemma), Gemma, + Gemma2, Cohere, Drbx, Falcon, diff --git a/router/src/lib.rs b/router/src/lib.rs index 4ba76f5f..a5b97af3 100644 --- a/router/src/lib.rs +++ b/router/src/lib.rs @@ -61,6 +61,9 @@ pub struct HubTokenizerConfig { pub bos_token: Option, #[serde(deserialize_with = "token_serde::deserialize")] pub eos_token: Option, + pub tokenizer_class: Option, + pub add_bos_token: Option, + pub add_eos_token: Option, } impl HubTokenizerConfig { diff --git a/router/src/main.rs b/router/src/main.rs index 68b6b1fc..3aa5a6bf 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -17,7 +17,7 @@ use text_generation_router::{ server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig, }; use thiserror::Error; -use tokenizers::Tokenizer; +use tokenizers::{processors::template::TemplateProcessing, Tokenizer}; use tower_http::cors::AllowOrigin; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -275,8 +275,6 @@ async fn main() -> Result<(), RouterError> { ) } }; - let tokenizer: Option = - tokenizer_filename.and_then(|filename| Tokenizer::from_file(filename).ok()); let config: Option = config_filename.and_then(|filename| { std::fs::read_to_string(filename) .ok() @@ -306,6 +304,37 @@ async fn main() -> Result<(), RouterError> { tracing::warn!("Could not find tokenizer config locally and no API specified"); HubTokenizerConfig::default() }); + let tokenizer: Option = + tokenizer_filename.and_then(|filename| { + let mut tokenizer = Tokenizer::from_file(filename).ok(); + if let Some(tokenizer) = &mut tokenizer{ + if let Some(class) = &tokenizer_config.tokenizer_class{ + if class == "LlamaTokenizer" || class == "LlamaTokenizerFast" { + tracing::info!("Overriding LllamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); + let mut single = vec![]; + let mut special_tokens = vec![]; + if let Some(true) = &tokenizer_config.add_bos_token{ + if let Some(bos_token) = &tokenizer_config.bos_token{ + let bos_token_id = tokenizer.token_to_id(&bos_token).expect("Should have found the bos token id"); + special_tokens.push((bos_token.clone(), bos_token_id)); + single.push(bos_token.to_string()); + } + } + single.push("$0".to_string()); + if let Some(true) = &tokenizer_config.add_eos_token{ + if let Some(eos_token) = &tokenizer_config.eos_token{ + let eos_token_id = tokenizer.token_to_id(&eos_token).expect("Should have found the eos token id"); + special_tokens.push((eos_token.clone(), eos_token_id)); + single.push(eos_token.to_string()); + } + } + let post_processor = TemplateProcessing::builder().try_single(single).unwrap().special_tokens(special_tokens).build().unwrap(); + tokenizer.with_post_processor(post_processor); + }} + } + tokenizer + + }); let preprocessor_config = preprocessor_config_filename.and_then(HubPreprocessorConfig::from_file); From 3ea8259af1c7b7efa4fdfe942a27afb1f0dbe2c1 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 27 Jun 2024 16:04:20 +0200 Subject: [PATCH 05/12] Fixing gemma2. (#2135) * Fixing gemma2. * Adding new model. --- docs/source/supported_models.md | 1 + .../text_generation_server/models/__init__.py | 30 ++ .../custom_modeling/flash_gemma2_modeling.py | 500 ++++++++++++++++++ .../custom_modeling/flash_gemma_modeling.py | 2 - .../models/flash_causal_lm.py | 18 +- .../models/flash_gemma2.py | 75 +++ .../text_generation_server/models/globals.py | 5 + 7 files changed, 622 insertions(+), 9 deletions(-) create mode 100644 server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py create mode 100644 server/text_generation_server/models/flash_gemma2.py diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md index 3468e988..1eeed39f 100644 --- a/docs/source/supported_models.md +++ b/docs/source/supported_models.md @@ -10,6 +10,7 @@ Text Generation Inference enables serving optimized models on specific hardware - [Llama](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) - [Phi 3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) - [Gemma](https://huggingface.co/google/gemma-7b) +- [Gemma2](https://huggingface.co/google/gemma2-9b) - [Cohere](https://huggingface.co/CohereForAI/c4ai-command-r-plus) - [Dbrx](https://huggingface.co/databricks/dbrx-instruct) - [Mamba](https://huggingface.co/state-spaces/mamba-2.8b-slimpj) diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 648fcee9..f2f0f457 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -68,6 +68,9 @@ try: from text_generation_server.models.flash_gemma import ( FlashGemma, ) + from text_generation_server.models.flash_gemma2 import ( + FlashGemma2, + ) from text_generation_server.models.pali_gemma import ( PaliGemma, ) @@ -102,6 +105,7 @@ if FLASH_ATTENTION: __all__.append(FlashQwen2) __all__.append(FlashStarcoder2) __all__.append(FlashGemma) + __all__.append(FlashGemma2) __all__.append(FlashCohere) MAMBA_AVAILABLE = True @@ -143,6 +147,11 @@ class ModelType(enum.Enum): "name": "Gemma", "url": "https://huggingface.co/google/gemma-7b", } + GEMMA2 = { + "type": "gemma2", + "name": "Gemma2", + "url": "https://huggingface.co/google/gemma2-9b", + } COHERE = { "type": "cohere", "name": "Cohere", @@ -630,6 +639,27 @@ def get_model( dtype=dtype, trust_remote_code=trust_remote_code, ) + elif model_type == GEMMA2: + if FLASH_ATTENTION: + return FlashGemma2( + model_id, + revision, + quantize=quantize, + speculator=speculator, + dtype=dtype, + trust_remote_code=trust_remote_code, + ) + elif sharded: + raise NotImplementedError(FLASH_ATT_ERROR_MESSAGE.format("Sharded Gemma2")) + else: + return CausalLM( + model_id, + revision, + quantize=quantize, + speculator=speculator, + dtype=dtype, + trust_remote_code=trust_remote_code, + ) if model_type == COHERE: if FLASH_ATTENTION: diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py new file mode 100644 index 00000000..a71de61f --- /dev/null +++ b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py @@ -0,0 +1,500 @@ +# coding=utf-8 +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torch.distributed + +from torch import nn +from transformers.activations import ACT2FN +from transformers.configuration_utils import PretrainedConfig +from typing import Optional, List, Tuple + +from text_generation_server.layers.attention import ( + paged_attention, + attention, + reshape_and_cache, +) +from text_generation_server.layers import ( + TensorParallelRowLinear, + TensorParallelColumnLinear, + TensorParallelEmbedding, + SpeculativeHead, + get_linear, +) +from text_generation_server.layers.rotary import PositionRotaryEmbedding +from text_generation_server.layers.layernorm import ( + FastRMSNorm, +) + + +class Gemma2Config(PretrainedConfig): + def __init__( + self, + vocab_size=256128, + hidden_size=3072, + intermediate_size=24576, + num_hidden_layers=28, + num_attention_heads=16, + num_key_value_heads=16, + head_dim=256, + hidden_act="gelu_pytorch_tanh", + max_position_embeddings=8192, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, + bos_token_id=1, + eos_token_id=2, + tie_word_embeddings=True, + rope_theta=10000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.head_dim = head_dim + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) + + +class Gemma2FastRMSNorm(FastRMSNorm): + @classmethod + def load(cls, prefix, weights, eps=1e-6): + dtype = weights.dtype + weights.dtype = torch.float32 + weight = weights.get_tensor(f"{prefix}.weight") + 1 + weights.dtype = dtype + new = cls(weight, eps) + new.dtype = dtype + return new + + # perform the multiplication in full precision and downcast after + def forward(self, hidden_states, residual=None): + if residual is not None: + hidden_states += residual + residual = hidden_states + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + hidden_states = hidden_states * self.weight + return hidden_states.to(self.dtype), residual + + +def load_attention(config, prefix, weights): + if config.num_attention_heads != config.num_key_value_heads: + return _load_gqa(config, prefix, weights) + else: + return TensorParallelColumnLinear.load_multi( + config, + prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"], + dim=0, + weights=weights, + bias=False, + ) + + +def _load_gqa(config, prefix: str, weights): + assert config.num_attention_heads % weights.process_group.size() == 0 + + weight = weights.get_multi_weights_col( + prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"], + quantize=config.quantize, + dim=0, + ) + + if config.quantize not in ["gptq", "awq", "marlin"]: + weight = weight.to(dtype=weights.dtype).to(device=weights.device) + + head_size = config.head_dim + num_heads = config.num_attention_heads // weights.process_group.size() + num_key_value_heads = config.num_key_value_heads // weights.process_group.size() + assert list(weight.shape) == [ + (num_heads + 2 * num_key_value_heads) * head_size, + config.hidden_size, + ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}" + + return TensorParallelColumnLinear( + get_linear(weight, bias=None, quantize=config.quantize) + ) + + +class FlashGemma2Attention(torch.nn.Module): + def __init__(self, prefix: str, config, weights, causal: bool, is_sliding: bool): + super().__init__() + self.num_heads = config.num_attention_heads + self.head_size = config.head_dim + self.causal = causal + if is_sliding: + self.window_size = config.sliding_window + else: + self.window_size = -1 + + self.rotary_emb = PositionRotaryEmbedding.static( + config=config, + dim=self.head_size, + base=config.rope_theta, + device=weights.device, + ) + + # self.softmax_scale = self.head_size**-0.5 + self.softmax_scale = config.query_pre_attn_scalar**-0.5 + + if self.num_heads % weights.process_group.size() != 0: + raise ValueError( + f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} " + f"and `num_shards`: {weights.process_group.size()}" + ) + self.num_heads = self.num_heads // weights.process_group.size() + self.num_key_value_heads = ( + config.num_key_value_heads // weights.process_group.size() + ) + + self.query_key_value = load_attention(config, prefix, weights) + + self.o_proj = TensorParallelRowLinear.load( + config, + prefix=f"{prefix}.o_proj", + weights=weights, + bias=False, + ) + self.num_groups = self.num_heads // self.num_key_value_heads + self.kv_head_mapping = torch.arange( + 0, self.num_key_value_heads, dtype=torch.int32, device=weights.device + ).repeat_interleave(self.num_groups) + + def forward( + self, + hidden_states, + cos, + sin, + cu_seqlen_prefill, + kv_cache, + block_tables, + slots, + input_lengths, + max_s, + ): + qkv = self.query_key_value(hidden_states) + query, kv = qkv.split( + [ + self.head_size * self.num_heads, + 2 * self.head_size * self.num_key_value_heads, + ], + dim=1, + ) + query = query.view(-1, self.num_heads, self.head_size) + kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size) + + self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin) + + reshape_and_cache(kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots) + + # output tensor + attn_output = torch.empty_like(query) + + # Prefill + if cu_seqlen_prefill is not None: + # flash attention + attention( + query, + torch.select(kv, dim=1, index=0), + torch.select(kv, dim=1, index=1), + attn_output, + cu_seqlen_prefill, + max_s, + self.softmax_scale, + causal=self.causal, + window_size_left=self.window_size, + ) + # Decode + else: + paged_attention( + attn_output, + query, + kv_cache[0], + kv_cache[1], + self.kv_head_mapping, + self.softmax_scale, + block_tables, + input_lengths, + max_s, + ) + + return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size)) + + +class Gemma2MLP(nn.Module): + def __init__(self, prefix, config, weights): + super().__init__() + act = config.hidden_act + self.act = ( + ACT2FN[act] + if "gelu" not in act + else lambda x: torch.nn.functional.gelu( + x, + approximate=( + "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none" + ), + ) + ) + # Fuse gate and up proj + self.gate_up_proj = TensorParallelColumnLinear.load_multi( + config, + prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"], + weights=weights, + dim=0, + bias=False, + ) + self.down_proj = TensorParallelRowLinear.load( + config, + prefix=f"{prefix}.down_proj", + weights=weights, + bias=False, + ) + self.intermediate_size = ( + config.intermediate_size // weights.process_group.size() + ) + + def forward(self, hidden_states): + gate_up_states = self.gate_up_proj(hidden_states) + gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size) + return self.down_proj(self.act(gate_up_states[:, 0]) * gate_up_states[:, 1]) + + +class FlashGemma2Layer(nn.Module): + def __init__(self, prefix, config, weights, causal: bool, is_sliding: bool): + super().__init__() + self.self_attn = FlashGemma2Attention( + prefix=f"{prefix}.self_attn", + config=config, + weights=weights, + causal=causal, + is_sliding=is_sliding, + ) + self.mlp = Gemma2MLP(prefix=f"{prefix}.mlp", config=config, weights=weights) + + self.input_layernorm = Gemma2FastRMSNorm.load( + prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps + ) + self.post_attention_layernorm = Gemma2FastRMSNorm.load( + prefix=f"{prefix}.post_attention_layernorm", + weights=weights, + eps=config.rms_norm_eps, + ) + self.pre_feedforward_layernorm = Gemma2FastRMSNorm.load( + prefix=f"{prefix}.pre_feedforward_layernorm", + weights=weights, + eps=config.rms_norm_eps, + ) + self.post_feedforward_layernorm = Gemma2FastRMSNorm.load( + prefix=f"{prefix}.post_feedforward_layernorm", + weights=weights, + eps=config.rms_norm_eps, + ) + + def forward( + self, + hidden_states, + residual, + cos, + sin, + cu_seqlen_prefill, + kv_cache, + block_tables, + slots, + input_lengths, + max_s, + ): + normed_hidden_states, res = self.input_layernorm(hidden_states, residual) + + # Self Attention + attn_output = self.self_attn( + normed_hidden_states, + cos, + sin, + cu_seqlen_prefill, + kv_cache, + block_tables, + slots, + input_lengths, + max_s, + ) + + # faster post attention rms norm + normed_attn_res_output, _ = self.post_attention_layernorm(attn_output) + normed_attn_res_output = normed_attn_res_output + res + res = normed_attn_res_output + + pre_normed, _ = self.pre_feedforward_layernorm(normed_attn_res_output) + mlp_output = self.mlp(pre_normed) + post_hidden_states, _ = self.post_feedforward_layernorm(mlp_output) + + return post_hidden_states, normed_attn_res_output + + +class FlashGemma2Model(torch.nn.Module): + def __init__(self, prefix, config, weights, causal: bool): + super().__init__() + + process_group = weights.process_group + self.tp_rank = process_group.rank() + self.tp_world_size = process_group.size() + self.layers = nn.ModuleList( + [ + FlashGemma2Layer( + prefix=f"{prefix}.layers.{layer_id}", + config=config, + weights=weights, + causal=causal, + is_sliding=layer_id % 2 == 0, + ) + for layer_id in range(config.num_hidden_layers) + ] + ) + self.norm = Gemma2FastRMSNorm.load( + prefix=f"{prefix}.norm", weights=weights, eps=config.rms_norm_eps + ) + + self.head_size = self.layers[0].self_attn.head_size + self.num_heads = self.layers[0].self_attn.num_heads + self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads + + def forward( + self, + inputs_embeds: torch.Tensor, + position_ids: torch.Tensor, + cu_seqlen_prefill: Optional[torch.Tensor], + kv_cache: List[Tuple[torch.Tensor, torch.Tensor]], + block_tables: torch.Tensor, + slots: torch.Tensor, + input_lengths: torch.Tensor, + max_s: int, + ) -> torch.Tensor: + hidden_states = inputs_embeds + + # Get rotary cos and sin for this forward + # Avoid to index in each layer + cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin( + position_ids, max_s, hidden_states.dtype + ) + + residual = None + for i, layer in enumerate(self.layers): + hidden_states, residual = layer( + hidden_states, + residual, + cos, + sin, + cu_seqlen_prefill, + kv_cache[i], + block_tables, + slots, + input_lengths, + max_s, + ) + + hidden_states, _ = self.norm(hidden_states, residual) + + return hidden_states + + +class FlashGemma2ForCausalLM(torch.nn.Module): + def __init__(self, prefix, config, weights, causal: bool): + super().__init__() + + embed_norm = config.hidden_size**0.5 + if not prefix: + prefix = "model" + else: + prefix = f"{prefix}.model" + + self.embed_tokens = TensorParallelEmbedding( + prefix=f"{prefix}.embed_tokens", weights=weights + ) + self.embed_tokens.weight *= embed_norm + + self.model = FlashGemma2Model( + prefix=prefix, config=config, weights=weights, causal=causal + ) + self.lm_head = SpeculativeHead.load( + prefix=( + f"{prefix}.embed_tokens" + if config.tie_word_embeddings + else f"{prefix}.lm_head" + ), + config=config, + weights=weights, + ) + + def forward( + self, + input_ids: torch.Tensor, + position_ids: torch.Tensor, + cu_seqlen_prefill: Optional[torch.Tensor], + kv_cache: List[Tuple[torch.Tensor, torch.Tensor]], + block_tables: torch.Tensor, + slots: torch.Tensor, + input_lengths: torch.Tensor, + max_s: int, + prefill_cache_indices: Optional[torch.Tensor], + lm_head_indices: Optional[torch.Tensor] = None, + adapter_data: Optional[torch.Tensor] = None, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + input_embeds = self.embed_tokens(input_ids) + hidden_states = self.model( + input_embeds, + position_ids, + cu_seqlen_prefill, + kv_cache, + block_tables, + slots, + input_lengths, + max_s, + ) + if lm_head_indices is not None: + hidden_states = hidden_states[lm_head_indices] + logits, speculative_logits = self.lm_head(hidden_states) + return logits, speculative_logits diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py index a4fd4740..82891823 100644 --- a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py @@ -375,8 +375,6 @@ class FlashGemmaModel(torch.nn.Module): prefix=f"{prefix}.norm", weights=weights, eps=config.rms_norm_eps ) - self.gradient_checkpointing = False - self.head_size = self.layers[0].self_attn.head_size self.num_heads = self.layers[0].self_attn.num_heads self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py index f7678762..a0a78b33 100644 --- a/server/text_generation_server/models/flash_causal_lm.py +++ b/server/text_generation_server/models/flash_causal_lm.py @@ -28,8 +28,12 @@ from text_generation_server.models.types import ( GeneratedText, ) from text_generation_server.pb import generate_pb2 -from text_generation_server.models.globals import MEM_POOL, CUDA_GRAPHS -import text_generation_server.models.globals as tgi_globals +from text_generation_server.models.globals import ( + MEM_POOL, + CUDA_GRAPHS, + get_adapter_to_index, + MODEL_ID, +) from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser from text_generation_server.utils.dist import MEMORY_FRACTION from text_generation_server.utils.segments import SegmentConcatBuilder, find_segments @@ -233,7 +237,8 @@ class FlashCausalLMBatch(Batch): stopping_criterias.append(stopping_criteria) top_n_tokens.append(r.top_n_tokens) - adapter_index = tgi_globals.ADAPTER_TO_INDEX.get(r.adapter_id, 0) + ADAPTER_TO_INDEX = get_adapter_to_index() + adapter_index = ADAPTER_TO_INDEX.get(r.adapter_id, 0) adapter_indices_list.append(torch.full((input_length,), adapter_index)) adapter_set.add(adapter_index) @@ -499,9 +504,8 @@ class FlashCausalLMBatch(Batch): top_n_tokens.append(self.top_n_tokens[idx]) - adapter_index = tgi_globals.ADAPTER_TO_INDEX.get( - self.requests[idx].adapter_id, 0 - ) + ADAPTER_TO_INDEX = get_adapter_to_index() + adapter_index = ADAPTER_TO_INDEX.get(self.requests[idx].adapter_id, 0) adapter_set.add(adapter_index) remaining_tokens = ( @@ -1017,7 +1021,7 @@ class FlashCausalLM(Model): tunableop_filepath = os.path.join( HUGGINGFACE_HUB_CACHE, - f"tunableop_{tgi_globals.MODEL_ID.replace('/', '-')}_tp{self.world_size}_rank{self.rank}.csv", + f"tunableop_{MODEL_ID.replace('/', '-')}_tp{self.world_size}_rank{self.rank}.csv", ) logger.info( diff --git a/server/text_generation_server/models/flash_gemma2.py b/server/text_generation_server/models/flash_gemma2.py new file mode 100644 index 00000000..9608113b --- /dev/null +++ b/server/text_generation_server/models/flash_gemma2.py @@ -0,0 +1,75 @@ +import torch +import torch.distributed + +from opentelemetry import trace +from typing import Optional +from transformers import PretrainedConfig, AutoTokenizer + +from text_generation_server.models import FlashCausalLM +from text_generation_server.models.custom_modeling.flash_gemma2_modeling import ( + FlashGemma2ForCausalLM, +) +from text_generation_server.utils import ( + initialize_torch_distributed, + weight_files, + Weights, +) + +tracer = trace.get_tracer(__name__) + + +class FlashGemma2(FlashCausalLM): + def __init__( + self, + model_id: str, + revision: Optional[str] = None, + quantize: Optional[str] = None, + speculator: Optional[str] = None, + dtype: Optional[torch.dtype] = None, + trust_remote_code: bool = False, + ): + self.process_group, rank, world_size = initialize_torch_distributed() + if torch.cuda.is_available(): + device = torch.device(f"cuda:{rank}") + dtype = torch.bfloat16 if dtype is None else dtype + else: + raise NotImplementedError("FlashGemma2 is only available on GPU") + + tokenizer = AutoTokenizer.from_pretrained( + model_id, + revision=revision, + padding_side="left", + truncation_side="left", + trust_remote_code=trust_remote_code, + ) + + config = PretrainedConfig.from_pretrained( + model_id, revision=revision, trust_remote_code=trust_remote_code + ) + config.quantize = quantize + config.speculator = speculator + + torch.distributed.barrier(group=self.process_group) + + filenames = weight_files(model_id, revision=revision, extension=".safetensors") + weights = Weights(filenames, device, dtype, process_group=self.process_group) + if config.quantize in ["gptq", "awq", "marlin"]: + weights._set_gptq_params(model_id, revision) + + # TODO hardcoded + prefix = "" + model = FlashGemma2ForCausalLM(prefix, config, weights, causal=True) + + torch.distributed.barrier(group=self.process_group) + super(FlashGemma2, self).__init__( + model_id=model_id, + model=model, + tokenizer=tokenizer, + num_layers=len(model.model.layers), + num_kv_heads=model.model.num_key_value_heads, + head_size=model.model.head_size, + dtype=dtype, + device=device, + rank=rank, + world_size=world_size, + ) diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index cc2f172a..bde86e6e 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -44,3 +44,8 @@ ADAPTER_TO_INDEX: Dict[str, int] = None def set_adapter_to_index(adapter_to_index: Dict[str, int]): global ADAPTER_TO_INDEX ADAPTER_TO_INDEX = adapter_to_index + + +def get_adapter_to_index(): + global ADAPTER_TO_INDEX + return ADAPTER_TO_INDEX From 74b0231b19de567769941c4bf18fe388434d0b3d Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 27 Jun 2024 17:16:19 -0400 Subject: [PATCH 06/12] fix: refactor post_processor logic and add test (#2137) * fix: refactor post_processor logic and add test * fix: remove dev comment * fix: adjust when post_processor is overridden and improve create_post_processor --- router/src/main.rs | 148 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 118 insertions(+), 30 deletions(-) diff --git a/router/src/main.rs b/router/src/main.rs index 3aa5a6bf..1e8093d8 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -304,37 +304,21 @@ async fn main() -> Result<(), RouterError> { tracing::warn!("Could not find tokenizer config locally and no API specified"); HubTokenizerConfig::default() }); - let tokenizer: Option = - tokenizer_filename.and_then(|filename| { - let mut tokenizer = Tokenizer::from_file(filename).ok(); - if let Some(tokenizer) = &mut tokenizer{ - if let Some(class) = &tokenizer_config.tokenizer_class{ - if class == "LlamaTokenizer" || class == "LlamaTokenizerFast" { - tracing::info!("Overriding LllamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); - let mut single = vec![]; - let mut special_tokens = vec![]; - if let Some(true) = &tokenizer_config.add_bos_token{ - if let Some(bos_token) = &tokenizer_config.bos_token{ - let bos_token_id = tokenizer.token_to_id(&bos_token).expect("Should have found the bos token id"); - special_tokens.push((bos_token.clone(), bos_token_id)); - single.push(bos_token.to_string()); - } - } - single.push("$0".to_string()); - if let Some(true) = &tokenizer_config.add_eos_token{ - if let Some(eos_token) = &tokenizer_config.eos_token{ - let eos_token_id = tokenizer.token_to_id(&eos_token).expect("Should have found the eos token id"); - special_tokens.push((eos_token.clone(), eos_token_id)); - single.push(eos_token.to_string()); - } - } - let post_processor = TemplateProcessing::builder().try_single(single).unwrap().special_tokens(special_tokens).build().unwrap(); - tokenizer.with_post_processor(post_processor); - }} - } - tokenizer - }); + let tokenizer: Option = tokenizer_filename.and_then(|filename| { + let mut tokenizer = Tokenizer::from_file(filename).ok(); + if let Some(tokenizer) = &mut tokenizer { + if let Some(class) = &tokenizer_config.tokenizer_class { + if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast") && tokenizer.get_post_processor().is_none() { + if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) { + tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); + tokenizer.with_post_processor(post_processor); + } + } + } + } + tokenizer + }); let preprocessor_config = preprocessor_config_filename.and_then(HubPreprocessorConfig::from_file); @@ -543,6 +527,77 @@ pub async fn get_tokenizer_config(api_repo: &ApiRepo) -> Option Result { + let add_bos_token = tokenizer_config.add_bos_token.unwrap_or(true); + let add_eos_token = tokenizer_config.add_eos_token.unwrap_or(false); + + let bos_token = tokenizer_config.bos_token.as_ref(); + let eos_token = tokenizer_config.eos_token.as_ref(); + + if add_bos_token && bos_token.is_none() { + panic!("add_bos_token = true but bos_token is None"); + } + + if add_eos_token && eos_token.is_none() { + panic!("add_eos_token = true but eos_token is None"); + } + + let mut single = Vec::new(); + let mut pair = Vec::new(); + let mut special_tokens = Vec::new(); + + if add_bos_token { + if let Some(bos) = bos_token { + let bos_token_id = tokenizer + .token_to_id(bos) + .expect("Should have found the bos token id"); + special_tokens.push((bos.clone(), bos_token_id)); + single.push(format!("{}:0", bos)); + pair.push(format!("{}:0", bos)); + } + } + + single.push("$A:0".to_string()); + pair.push("$A:0".to_string()); + + if add_eos_token { + if let Some(eos) = eos_token { + let eos_token_id = tokenizer + .token_to_id(eos) + .expect("Should have found the eos token id"); + special_tokens.push((eos.clone(), eos_token_id)); + single.push(format!("{}:0", eos)); + pair.push(format!("{}:0", eos)); + } + } + + if add_bos_token { + if let Some(bos) = bos_token { + single.push(format!("{}:1", bos)); + } + } + + pair.push("$B:1".to_string()); + + if add_eos_token { + if let Some(eos) = eos_token { + pair.push(format!("{}:1", eos)); + } + } + + let post_processor = TemplateProcessing::builder() + .try_single(single)? + .try_pair(pair)? + .special_tokens(special_tokens) + .build()?; + + Ok(post_processor) +} + #[derive(Debug, Error)] enum RouterError { #[error("Argument validation error: {0}")] @@ -552,3 +607,36 @@ enum RouterError { #[error("Tokio runtime failed to start: {0}")] Tokio(#[from] std::io::Error), } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_create_post_processor() { + let tokenizer_config = HubTokenizerConfig { + add_bos_token: None, + add_eos_token: None, + bos_token: Some("".to_string()), + eos_token: Some("".to_string()), + chat_template: None, + tokenizer_class: None, + completion_template: None, + }; + + let tokenizer = + Tokenizer::from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", None).unwrap(); + let post_processor = create_post_processor(&tokenizer, &tokenizer_config).unwrap(); + + let expected = TemplateProcessing::builder() + .try_single(":0 $A:0 :1") + .unwrap() + .try_pair(":0 $A:0 $B:1") + .unwrap() + .special_tokens(vec![("".to_string(), 1)]) + .build() + .unwrap(); + + assert_eq!(post_processor, expected); + } +} From fb98ab273fa5ed7f43f309de1d8f9ebb41e620de Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 28 Jun 2024 09:31:09 +0200 Subject: [PATCH 07/12] Fixing the CI to also run in release when it's a tag ? (#2138) --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 80f258fa..3de270ea 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -157,7 +157,7 @@ jobs: runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' env: - PYTEST_FLAGS: ${{ github.ref == 'refs/heads/main' && '--release' || '' }} + PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') && '--release' || '' }} steps: - name: Checkout repository uses: actions/checkout@v4 From 6ea570ddfe4a900e98ed60802816ca8373a2d47f Mon Sep 17 00:00:00 2001 From: "Wang, Yi" Date: Mon, 1 Jul 2024 17:27:53 +0800 Subject: [PATCH 08/12] =?UTF-8?q?fix=20microsoft/Phi-3-mini-4k-instruct=20?= =?UTF-8?q?crash=20in=20batch.slots[batch.slot=5F=E2=80=A6=20(#2148)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_indices] Signed-off-by: Wang, Yi A * Apply suggestions from code review --------- Signed-off-by: Wang, Yi A Co-authored-by: Nicolas Patry --- router/src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/router/src/main.rs b/router/src/main.rs index 1e8093d8..a8651b67 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -17,7 +17,7 @@ use text_generation_router::{ server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig, }; use thiserror::Error; -use tokenizers::{processors::template::TemplateProcessing, Tokenizer}; +use tokenizers::{processors::template::TemplateProcessing, Tokenizer, PostProcessor}; use tower_http::cors::AllowOrigin; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -309,7 +309,7 @@ async fn main() -> Result<(), RouterError> { let mut tokenizer = Tokenizer::from_file(filename).ok(); if let Some(tokenizer) = &mut tokenizer { if let Some(class) = &tokenizer_config.tokenizer_class { - if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast") && tokenizer.get_post_processor().is_none() { + if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast"){ if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) { tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); tokenizer.with_post_processor(post_processor); @@ -577,7 +577,7 @@ pub fn create_post_processor( if add_bos_token { if let Some(bos) = bos_token { - single.push(format!("{}:1", bos)); + pair.push(format!("{}:1", bos)); } } From b4552f9de93e83ebdd5cf5c26e0c0daaf4a76080 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 1 Jul 2024 12:02:19 +0200 Subject: [PATCH 09/12] Fixing clippy. (#2149) --- router/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/src/main.rs b/router/src/main.rs index a8651b67..8a5cf459 100644 --- a/router/src/main.rs +++ b/router/src/main.rs @@ -17,7 +17,7 @@ use text_generation_router::{ server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig, }; use thiserror::Error; -use tokenizers::{processors::template::TemplateProcessing, Tokenizer, PostProcessor}; +use tokenizers::{processors::template::TemplateProcessing, Tokenizer}; use tower_http::cors::AllowOrigin; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::util::SubscriberInitExt; @@ -309,7 +309,7 @@ async fn main() -> Result<(), RouterError> { let mut tokenizer = Tokenizer::from_file(filename).ok(); if let Some(tokenizer) = &mut tokenizer { if let Some(class) = &tokenizer_config.tokenizer_class { - if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast"){ + if class == "LlamaTokenizer" || class == "LlamaTokenizerFast"{ if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) { tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); tokenizer.with_post_processor(post_processor); From 25f57e2e98c6a27e39f8c8e79eeff6c21b8e6f5c Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 1 Jul 2024 06:58:40 -0400 Subject: [PATCH 10/12] fix: use weights from base_layer (#2141) --- .../models/custom_modeling/flash_llama_modeling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index c48ed268..6b82aeca 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -309,7 +309,9 @@ class LlamaMLP(nn.Module): dtype=hidden_states.dtype, device="cuda", ) - _custom_C.LLMM_Silu(self.gate_up_proj.linear.weight, hidden_states, out, 8) + _custom_C.LLMM_Silu( + self.gate_up_proj.base_layer.linear.weight, hidden_states, out, 8 + ) return self.down_proj(out, adapter_data) else: gate_up_states = self.gate_up_proj(hidden_states, adapter_data) From 0d97a93c1e14d497e911f42db2da0b9eb032fe75 Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 1 Jul 2024 06:58:49 -0400 Subject: [PATCH 11/12] feat: download lora adapter weights from launcher (#2140) --- launcher/src/main.rs | 46 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 816fa5f3..d2ca38e5 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -898,13 +898,20 @@ enum LauncherError { WebserverCannotStart, } -fn download_convert_model(args: &Args, running: Arc) -> Result<(), LauncherError> { +fn download_convert_model( + model_id: &str, + revision: Option<&str>, + trust_remote_code: bool, + huggingface_hub_cache: Option<&str>, + weights_cache_override: Option<&str>, + running: Arc, +) -> Result<(), LauncherError> { // Enter download tracing span let _span = tracing::span!(tracing::Level::INFO, "download").entered(); let mut download_args = vec![ "download-weights".to_string(), - args.model_id.to_string(), + model_id.to_string(), "--extension".to_string(), ".safetensors".to_string(), "--logger-level".to_string(), @@ -913,13 +920,13 @@ fn download_convert_model(args: &Args, running: Arc) -> Result<(), L ]; // Model optional revision - if let Some(revision) = &args.revision { + if let Some(revision) = &revision { download_args.push("--revision".to_string()); download_args.push(revision.to_string()) } // Trust remote code for automatic peft fusion - if args.trust_remote_code { + if trust_remote_code { download_args.push("--trust-remote-code".to_string()); } @@ -934,7 +941,7 @@ fn download_convert_model(args: &Args, running: Arc) -> Result<(), L // If huggingface_hub_cache is set, pass it to the download process // Useful when running inside a docker container - if let Some(ref huggingface_hub_cache) = args.huggingface_hub_cache { + if let Some(ref huggingface_hub_cache) = huggingface_hub_cache { envs.push(("HUGGINGFACE_HUB_CACHE".into(), huggingface_hub_cache.into())); }; @@ -952,7 +959,7 @@ fn download_convert_model(args: &Args, running: Arc) -> Result<(), L // If args.weights_cache_override is some, pass it to the download process // Useful when running inside a HuggingFace Inference Endpoint - if let Some(weights_cache_override) = &args.weights_cache_override { + if let Some(weights_cache_override) = &weights_cache_override { envs.push(( "WEIGHTS_CACHE_OVERRIDE".into(), weights_cache_override.into(), @@ -960,7 +967,7 @@ fn download_convert_model(args: &Args, running: Arc) -> Result<(), L }; // Start process - tracing::info!("Starting download process."); + tracing::info!("Starting check and download process for {model_id}"); let mut download_process = match Command::new("text-generation-server") .args(download_args) .env_clear() @@ -1002,7 +1009,7 @@ fn download_convert_model(args: &Args, running: Arc) -> Result<(), L loop { if let Some(status) = download_process.try_wait().unwrap() { if status.success() { - tracing::info!("Successfully downloaded weights."); + tracing::info!("Successfully downloaded weights for {model_id}"); break; } @@ -1557,7 +1564,28 @@ fn main() -> Result<(), LauncherError> { .expect("Error setting Ctrl-C handler"); // Download and convert model weights - download_convert_model(&args, running.clone())?; + download_convert_model( + &args.model_id, + args.revision.as_deref(), + args.trust_remote_code, + args.huggingface_hub_cache.as_deref(), + args.weights_cache_override.as_deref(), + running.clone(), + )?; + + // Download and convert lora adapters if any + if let Some(lora_adapters) = &args.lora_adapters { + for adapter in lora_adapters.split(',') { + download_convert_model( + adapter, + None, + args.trust_remote_code, + args.huggingface_hub_cache.as_deref(), + args.weights_cache_override.as_deref(), + running.clone(), + )?; + } + } if !running.load(Ordering::SeqCst) { // Launcher was asked to stop From 2ce80194806f73c1b7ced1d686ce01efd3aefdc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 1 Jul 2024 12:59:12 +0200 Subject: [PATCH 12/12] Use GPTQ-Marlin for supported GPTQ configurations (#2111) GPTQ-Marlin is currently the best-performing kernel for GPTQ models. So let's use it by default if the kernels are installed, the GPU supports it, and the kernels support the configuration. For models generated by `text-generation-server quantize`, use `sym=False`. This subcommand symmetric quantization since the beginning and incorrectly reporting the model to be symmetric will use GPTQ-Marlin (which does not support asymmetric quantization). --- .../test_flash_llama_gptq_marlin.json | 84 ----- ...st_flash_llama_gptq_marlin_all_params.json | 84 ----- .../test_flash_llama_gptq_marlin_load.json | 338 ------------------ .../models/test_flash_llama_gptq_marlin.py | 68 ---- .../layers/gptq/__init__.py | 10 + .../text_generation_server/layers/linear.py | 67 ++-- .../text_generation_server/layers/marlin.py | 15 + .../text_generation_server/utils/weights.py | 200 +++++------ 8 files changed, 144 insertions(+), 722 deletions(-) delete mode 100644 integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin.json delete mode 100644 integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_all_params.json delete mode 100644 integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_load.json delete mode 100644 integration-tests/models/test_flash_llama_gptq_marlin.py diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin.json deleted file mode 100644 index 0f99d259..00000000 --- a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 198, - "logprob": -2.5742188, - "special": false, - "text": "\n" - }, - { - "id": 262, - "logprob": -1.6230469, - "special": false, - "text": " " - }, - { - "id": 3270, - "logprob": -2.046875, - "special": false, - "text": " \"\"\"\n" - }, - { - "id": 262, - "logprob": -0.015281677, - "special": false, - "text": " " - }, - { - "id": 422, - "logprob": -2.1425781, - "special": false, - "text": " if" - }, - { - "id": 1715, - "logprob": -0.9238281, - "special": false, - "text": " request" - }, - { - "id": 13204, - "logprob": -0.076660156, - "special": false, - "text": ".method" - }, - { - "id": 624, - "logprob": -0.021987915, - "special": false, - "text": " ==" - }, - { - "id": 364, - "logprob": -0.39208984, - "special": false, - "text": " '" - }, - { - "id": 3019, - "logprob": -0.10821533, - "special": false, - "text": "POST" - } - ], - "top_tokens": null - }, - "generated_text": "\n \"\"\"\n if request.method == 'POST" -} diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_all_params.json deleted file mode 100644 index 4152b5b3..00000000 --- a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_all_params.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": 0, - "tokens": [ - { - "id": 13, - "logprob": -2.2539062, - "special": false, - "text": "." - }, - { - "id": 578, - "logprob": -0.15563965, - "special": false, - "text": " The" - }, - { - "id": 3622, - "logprob": -0.8203125, - "special": false, - "text": " server" - }, - { - "id": 706, - "logprob": 0.0, - "special": false, - "text": " has" - }, - { - "id": 539, - "logprob": 0.0, - "special": false, - "text": " not" - }, - { - "id": 3686, - "logprob": 0.0, - "special": false, - "text": " yet" - }, - { - "id": 3288, - "logprob": 0.0, - "special": false, - "text": " sent" - }, - { - "id": 904, - "logprob": 0.0, - "special": false, - "text": " any" - }, - { - "id": 828, - "logprob": 0.0, - "special": false, - "text": " data" - }, - { - "id": 382, - "logprob": -1.5517578, - "special": false, - "text": ".\n\n" - } - ], - "top_tokens": null - }, - "generated_text": "Test request. The server has not yet sent any data.\n\n" -} diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_load.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_load.json deleted file mode 100644 index 75e90303..00000000 --- a/integration-tests/models/__snapshots__/test_flash_llama_gptq_marlin/test_flash_llama_gptq_marlin_load.json +++ /dev/null @@ -1,338 +0,0 @@ -[ - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 198, - "logprob": -2.5742188, - "special": false, - "text": "\n" - }, - { - "id": 262, - "logprob": -1.6220703, - "special": false, - "text": " " - }, - { - "id": 3270, - "logprob": -2.0410156, - "special": false, - "text": " \"\"\"\n" - }, - { - "id": 262, - "logprob": -0.015281677, - "special": false, - "text": " " - }, - { - "id": 422, - "logprob": -2.1445312, - "special": false, - "text": " if" - }, - { - "id": 1715, - "logprob": -0.92333984, - "special": false, - "text": " request" - }, - { - "id": 13204, - "logprob": -0.07672119, - "special": false, - "text": ".method" - }, - { - "id": 624, - "logprob": -0.021987915, - "special": false, - "text": " ==" - }, - { - "id": 364, - "logprob": -0.39208984, - "special": false, - "text": " '" - }, - { - "id": 3019, - "logprob": -0.10638428, - "special": false, - "text": "POST" - } - ], - "top_tokens": null - }, - "generated_text": "\n \"\"\"\n if request.method == 'POST" - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 198, - "logprob": -2.5742188, - "special": false, - "text": "\n" - }, - { - "id": 262, - "logprob": -1.6220703, - "special": false, - "text": " " - }, - { - "id": 3270, - "logprob": -2.0410156, - "special": false, - "text": " \"\"\"\n" - }, - { - "id": 262, - "logprob": -0.015281677, - "special": false, - "text": " " - }, - { - "id": 422, - "logprob": -2.1445312, - "special": false, - "text": " if" - }, - { - "id": 1715, - "logprob": -0.92333984, - "special": false, - "text": " request" - }, - { - "id": 13204, - "logprob": -0.07672119, - "special": false, - "text": ".method" - }, - { - "id": 624, - "logprob": -0.021987915, - "special": false, - "text": " ==" - }, - { - "id": 364, - "logprob": -0.39208984, - "special": false, - "text": " '" - }, - { - "id": 3019, - "logprob": -0.10638428, - "special": false, - "text": "POST" - } - ], - "top_tokens": null - }, - "generated_text": "\n \"\"\"\n if request.method == 'POST" - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 198, - "logprob": -2.5742188, - "special": false, - "text": "\n" - }, - { - "id": 262, - "logprob": -1.6220703, - "special": false, - "text": " " - }, - { - "id": 3270, - "logprob": -2.0410156, - "special": false, - "text": " \"\"\"\n" - }, - { - "id": 262, - "logprob": -0.015281677, - "special": false, - "text": " " - }, - { - "id": 422, - "logprob": -2.1445312, - "special": false, - "text": " if" - }, - { - "id": 1715, - "logprob": -0.92333984, - "special": false, - "text": " request" - }, - { - "id": 13204, - "logprob": -0.07672119, - "special": false, - "text": ".method" - }, - { - "id": 624, - "logprob": -0.021987915, - "special": false, - "text": " ==" - }, - { - "id": 364, - "logprob": -0.39208984, - "special": false, - "text": " '" - }, - { - "id": 3019, - "logprob": -0.10638428, - "special": false, - "text": "POST" - } - ], - "top_tokens": null - }, - "generated_text": "\n \"\"\"\n if request.method == 'POST" - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 2323, - "logprob": null, - "text": "Test" - }, - { - "id": 1715, - "logprob": -11.34375, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 198, - "logprob": -2.5742188, - "special": false, - "text": "\n" - }, - { - "id": 262, - "logprob": -1.6220703, - "special": false, - "text": " " - }, - { - "id": 3270, - "logprob": -2.0410156, - "special": false, - "text": " \"\"\"\n" - }, - { - "id": 262, - "logprob": -0.015281677, - "special": false, - "text": " " - }, - { - "id": 422, - "logprob": -2.1445312, - "special": false, - "text": " if" - }, - { - "id": 1715, - "logprob": -0.92333984, - "special": false, - "text": " request" - }, - { - "id": 13204, - "logprob": -0.07672119, - "special": false, - "text": ".method" - }, - { - "id": 624, - "logprob": -0.021987915, - "special": false, - "text": " ==" - }, - { - "id": 364, - "logprob": -0.39208984, - "special": false, - "text": " '" - }, - { - "id": 3019, - "logprob": -0.10638428, - "special": false, - "text": "POST" - } - ], - "top_tokens": null - }, - "generated_text": "\n \"\"\"\n if request.method == 'POST" - } -] diff --git a/integration-tests/models/test_flash_llama_gptq_marlin.py b/integration-tests/models/test_flash_llama_gptq_marlin.py deleted file mode 100644 index 2274abce..00000000 --- a/integration-tests/models/test_flash_llama_gptq_marlin.py +++ /dev/null @@ -1,68 +0,0 @@ -import pytest - - -@pytest.fixture(scope="module") -def flash_llama_gptq_marlin_handle(launcher): - with launcher( - "astronomer/Llama-3-8B-Instruct-GPTQ-4-Bit", num_shard=2, quantize="marlin" - ) as handle: - yield handle - - -@pytest.fixture(scope="module") -async def flash_llama_gptq_marlin(flash_llama_gptq_marlin_handle): - await flash_llama_gptq_marlin_handle.health(300) - return flash_llama_gptq_marlin_handle.client - - -@pytest.mark.release -@pytest.mark.asyncio -@pytest.mark.private -async def test_flash_llama_gptq_marlin(flash_llama_gptq_marlin, response_snapshot): - response = await flash_llama_gptq_marlin.generate( - "Test request", max_new_tokens=10, decoder_input_details=True - ) - - assert response.details.generated_tokens == 10 - assert response == response_snapshot - - -@pytest.mark.release -@pytest.mark.asyncio -@pytest.mark.private -async def test_flash_llama_gptq_marlin_all_params( - flash_llama_gptq_marlin, response_snapshot -): - response = await flash_llama_gptq_marlin.generate( - "Test request", - max_new_tokens=10, - repetition_penalty=1.2, - return_full_text=True, - temperature=0.5, - top_p=0.9, - top_k=10, - truncate=5, - typical_p=0.9, - watermark=True, - decoder_input_details=True, - seed=0, - ) - - assert response.details.generated_tokens == 10 - assert response == response_snapshot - - -@pytest.mark.release -@pytest.mark.asyncio -@pytest.mark.private -async def test_flash_llama_gptq_marlin_load( - flash_llama_gptq_marlin, generate_load, response_snapshot -): - responses = await generate_load( - flash_llama_gptq_marlin, "Test request", max_new_tokens=10, n=4 - ) - - assert len(responses) == 4 - assert all([r.generated_text == responses[0].generated_text for r in responses]) - - assert responses == response_snapshot diff --git a/server/text_generation_server/layers/gptq/__init__.py b/server/text_generation_server/layers/gptq/__init__.py index 1172775f..56080145 100644 --- a/server/text_generation_server/layers/gptq/__init__.py +++ b/server/text_generation_server/layers/gptq/__init__.py @@ -7,6 +7,16 @@ from text_generation_server.utils.import_utils import ( ) +@dataclass +class GPTQParams: + bits: int + checkpoint_format: Optional[str] + groupsize: int + desc_act: bool + quant_method: str + sym: bool + + @dataclass class GPTQWeight: qweight: torch.Tensor diff --git a/server/text_generation_server/layers/linear.py b/server/text_generation_server/layers/linear.py index dd48465f..e94e5465 100644 --- a/server/text_generation_server/layers/linear.py +++ b/server/text_generation_server/layers/linear.py @@ -166,35 +166,45 @@ def get_linear(weight, bias, quantize): elif quantize == "gptq": from text_generation_server.layers.gptq import GPTQWeight + from text_generation_server.layers.marlin import ( + GPTQMarlinLinear, + GPTQMarlinWeight, + ) - if not isinstance(weight, GPTQWeight): + if isinstance(weight, GPTQMarlinWeight): + linear = GPTQMarlinLinear( + weight=weight, + bias=bias, + ) + elif isinstance(weight, GPTQWeight): + if weight.use_exllama: + try: + from text_generation_server.layers.gptq import ( + ExllamaQuantLinear, + ) + except ImportError: + raise NotImplementedError( + f"Exllama gptq kernels are not installed. Install them `cd server/exllama_kernels && python setup.py install && cd ../exllamav2_kernels && python setup.py install`" + ) + + linear = ExllamaQuantLinear(weight, bias) + else: + from text_generation_server.layers.gptq.quant_linear import QuantLinear + + linear = QuantLinear( + weight.qweight, + weight.qzeros, + weight.scales, + weight.g_idx, + bias, + weight.bits, + weight.groupsize, + ) + else: raise NotImplementedError( f"The passed weight is not `gptq` compatible, loader needs to be updated." ) - if weight.use_exllama: - try: - from text_generation_server.layers.gptq import ( - ExllamaQuantLinear, - ) - except ImportError: - raise NotImplementedError( - f"Exllama gptq kernels are not installed. Install them `cd server/exllama_kernels && python setup.py install && cd ../exllamav2_kernels && python setup.py install`" - ) - - linear = ExllamaQuantLinear(weight, bias) - else: - from text_generation_server.layers.gptq.quant_linear import QuantLinear - - linear = QuantLinear( - weight.qweight, - weight.qzeros, - weight.scales, - weight.g_idx, - bias, - weight.bits, - weight.groupsize, - ) elif quantize == "awq": from text_generation_server.layers.gptq import GPTQWeight @@ -226,18 +236,11 @@ def get_linear(weight, bias, quantize): from text_generation_server.layers.marlin import ( GPTQMarlin24Linear, GPTQMarlin24Weight, - GPTQMarlinLinear, - GPTQMarlinWeight, MarlinLinear, MarlinWeight, ) - if isinstance(weight, GPTQMarlinWeight): - linear = GPTQMarlinLinear( - weight=weight, - bias=bias, - ) - elif isinstance(weight, GPTQMarlin24Weight): + if isinstance(weight, GPTQMarlin24Weight): linear = GPTQMarlin24Linear( weight=weight, bias=bias, diff --git a/server/text_generation_server/layers/marlin.py b/server/text_generation_server/layers/marlin.py index 2207b2e4..a1af67a3 100644 --- a/server/text_generation_server/layers/marlin.py +++ b/server/text_generation_server/layers/marlin.py @@ -3,6 +3,8 @@ from typing import List, Optional, Tuple import torch import torch.nn as nn + +from text_generation_server.layers.gptq import GPTQParams from text_generation_server.utils.import_utils import SYSTEM try: @@ -22,6 +24,19 @@ GPTQ_MARLIN_GROUP_SIZES = [-1, 32, 64, 128] MARLIN_TILE_SIZE = 16 +def can_use_gptq_marlin(gptq_params: GPTQParams, quantize: str) -> bool: + return ( + SYSTEM == "cuda" + and marlin_kernels is not None + and has_sm_8_0 + and quantize == "gptq" + and gptq_params.quant_method == "gptq" + and gptq_params.bits in GPTQ_MARLIN_BITS + and gptq_params.groupsize in GPTQ_MARLIN_GROUP_SIZES + and gptq_params.sym + ) + + def _check_marlin_kernels(): if not (SYSTEM == "cuda" and has_sm_8_0): raise NotImplementedError( diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py index 348d215c..3731fd24 100644 --- a/server/text_generation_server/utils/weights.py +++ b/server/text_generation_server/utils/weights.py @@ -1,25 +1,15 @@ import os -from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Union from safetensors import safe_open, SafetensorError import torch from loguru import logger from huggingface_hub import hf_hub_download import json +from text_generation_server.layers.gptq import GPTQParams from text_generation_server.utils.log import log_once -@dataclass -class _GPTQParams: - bits: int - checkpoint_format: Optional[str] - groupsize: int - desc_act: bool - quant_method: str - sym: bool - - class Weights: def __init__( self, @@ -212,6 +202,10 @@ class Weights: """ if quantize in ["gptq", "awq"]: from text_generation_server.layers.gptq import GPTQWeight + from text_generation_server.layers.marlin import ( + can_use_gptq_marlin, + repack_gptq_for_marlin, + ) try: qweight = self.get_packed_sharded( @@ -221,17 +215,28 @@ class Weights: raise RuntimeError( f"Cannot load `{quantize}` weight, make sure the model is already quantized." ) - - gptq_params = self._get_gptq_params() - - qzeros = self.get_packed_sharded( - f"{prefix}.qzeros", dim=1, block_sizes=block_sizes - ) scales = self.get_packed_sharded( f"{prefix}.scales", dim=1, block_sizes=block_sizes ) scales = scales.to(dtype=self.dtype) + gptq_params = self._get_gptq_params() + if can_use_gptq_marlin(gptq_params, quantize): + g_idx = self.get_tensor(f"{prefix}.g_idx") + return repack_gptq_for_marlin( + qweight=qweight, + scales=scales, + g_idx=g_idx, + bits=gptq_params.bits, + desc_act=gptq_params.desc_act, + groupsize=gptq_params.groupsize, + sym=gptq_params.sym, + sharded_infeatures=False, + ) + + qzeros = self.get_packed_sharded( + f"{prefix}.qzeros", dim=1, block_sizes=block_sizes + ) if quantize == "gptq" and gptq_params.quant_method == "gptq": g_idx = self.get_tensor(f"{prefix}.g_idx") elif quantize == "gptq" and gptq_params.quant_method == "awq": @@ -269,7 +274,6 @@ class Weights: repack_gptq_for_marlin, ) - quant_method = getattr(self, "quant_method", "marlin") is_marlin_24 = getattr(self, "gptq_checkpoint_format", None) == "marlin_24" if is_marlin_24: B = self.get_packed_sharded( @@ -286,31 +290,6 @@ class Weights: weight = GPTQMarlin24Weight( B=B, B_meta=B_meta, s=s, bits=gptq_params.bits ) - elif quant_method == "gptq": - gptq_params = self._get_gptq_params() - try: - qweight = self.get_packed_sharded( - f"{prefix}.qweight", dim=1, block_sizes=block_sizes - ) - except RuntimeError: - raise RuntimeError( - f"Cannot load `{quantize}` weight for GPTQ -> Marlin repacking, make sure the model is already quantized" - ) - - scales = self.get_packed_sharded( - f"{prefix}.scales", dim=1, block_sizes=block_sizes - ) - g_idx = self.get_tensor(f"{prefix}.g_idx") - weight = repack_gptq_for_marlin( - qweight=qweight, - scales=scales, - g_idx=g_idx, - bits=gptq_params.bits, - desc_act=gptq_params.desc_act, - groupsize=gptq_params.groupsize, - sym=gptq_params.sym, - sharded_infeatures=False, - ) else: B = self.get_packed_sharded( f"{prefix}.B", dim=1, block_sizes=block_sizes @@ -356,6 +335,10 @@ class Weights: raise ValueError("get_multi_weights_col is not supported for exl2") elif quantize in ["gptq", "awq"]: from text_generation_server.layers.gptq import GPTQWeight + from text_generation_server.layers.marlin import ( + can_use_gptq_marlin, + repack_gptq_for_marlin, + ) try: qweight = torch.cat( @@ -366,14 +349,31 @@ class Weights: f"Cannot load `{quantize}` weight, make sure the model is already quantized" ) - qzeros = torch.cat( - [self.get_sharded(f"{p}.qzeros", dim=1) for p in prefixes], dim=1 - ) scales = torch.cat( [self.get_sharded(f"{p}.scales", dim=1) for p in prefixes], dim=1 ) gptq_params = self._get_gptq_params() + if can_use_gptq_marlin(gptq_params, quantize): + w = [self.get_tensor(f"{p}.g_idx") for p in prefixes] + for w2 in w[1:]: + torch.testing.assert_close(w2, w[0]) + g_idx = w[0] + + return repack_gptq_for_marlin( + qweight=qweight, + scales=scales, + g_idx=g_idx, + bits=gptq_params.bits, + desc_act=gptq_params.desc_act, + groupsize=gptq_params.groupsize, + sym=gptq_params.sym, + sharded_infeatures=False, + ) + + qzeros = torch.cat( + [self.get_sharded(f"{p}.qzeros", dim=1) for p in prefixes], dim=1 + ) from text_generation_server.layers.gptq import HAS_EXLLAMA @@ -425,10 +425,8 @@ class Weights: from text_generation_server.layers.marlin import ( GPTQMarlin24Weight, MarlinWeight, - repack_gptq_for_marlin, ) - quant_method = getattr(self, "quant_method", "marlin") is_marlin_24 = getattr(self, "gptq_checkpoint_format", None) == "marlin_24" if is_marlin_24: try: @@ -452,36 +450,6 @@ class Weights: weight = GPTQMarlin24Weight( B=B, B_meta=B_meta, s=s, bits=gptq_params.bits ) - elif quant_method == "gptq": - gptq_params = self._get_gptq_params() - try: - qweight = torch.cat( - [self.get_sharded(f"{p}.qweight", dim=1) for p in prefixes], - dim=1, - ) - except RuntimeError: - raise RuntimeError( - f"Cannot load `{quantize}` weight for GPTQ -> Marlin repacking, make sure the model is already quantized" - ) - - scales = torch.cat( - [self.get_sharded(f"{p}.scales", dim=1) for p in prefixes], dim=1 - ) - w = [self.get_tensor(f"{p}.g_idx") for p in prefixes] - for w2 in w[1:]: - torch.testing.assert_close(w2, w[0]) - g_idx = w[0] - - weight = repack_gptq_for_marlin( - qweight=qweight, - scales=scales, - g_idx=g_idx, - bits=gptq_params.bits, - desc_act=gptq_params.desc_act, - groupsize=gptq_params.groupsize, - sym=gptq_params.sym, - sharded_infeatures=False, - ) else: try: B = torch.cat( @@ -544,9 +512,41 @@ class Weights: ) elif quantize == "gptq": - use_exllama = True - gptq_params = self._get_gptq_params() + from text_generation_server.layers.marlin import ( + can_use_gptq_marlin, + repack_gptq_for_marlin, + ) + gptq_params = self._get_gptq_params() + if can_use_gptq_marlin(gptq_params, quantize): + log_once(logger.info, "Using GPTQ-Marlin kernels") + try: + qweight = self.get_sharded(f"{prefix}.qweight", dim=0) + except RuntimeError: + raise RuntimeError( + f"Cannot load `{quantize}` weight for GPTQ -> Marlin repacking, make sure the model is already quantized" + ) + + g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0) + if gptq_params.desc_act or gptq_params.groupsize == -1: + scales = self.get_tensor(f"{prefix}.scales") + else: + scales = self.get_sharded(f"{prefix}.scales", dim=0) + + sharded_in_features = self.process_group.size() > 1 + + return repack_gptq_for_marlin( + qweight=qweight, + scales=scales, + g_idx=g_idx, + bits=gptq_params.bits, + desc_act=gptq_params.desc_act, + groupsize=gptq_params.groupsize, + sym=gptq_params.sym, + sharded_infeatures=sharded_in_features, + ) + + use_exllama = True if gptq_params.bits != 4: use_exllama = False @@ -672,10 +672,8 @@ class Weights: from text_generation_server.layers.marlin import ( GPTQMarlin24Weight, MarlinWeight, - repack_gptq_for_marlin, ) - quant_method = getattr(self, "quant_method", "marlin") is_marlin_24 = getattr(self, "gptq_checkpoint_format", None) == "marlin_24" if is_marlin_24: try: @@ -698,35 +696,6 @@ class Weights: weight = GPTQMarlin24Weight( B=B, B_meta=B_meta, s=s, bits=gptq_params.bits ) - elif quant_method == "gptq": - log_once(logger.info, "Converting GPTQ model to Marlin packing format.") - gptq_params = self._get_gptq_params() - - try: - qweight = self.get_sharded(f"{prefix}.qweight", dim=0) - except RuntimeError: - raise RuntimeError( - f"Cannot load `{quantize}` weight for GPTQ -> Marlin repacking, make sure the model is already quantized" - ) - - g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0) - if gptq_params.desc_act or gptq_params.groupsize == -1: - scales = self.get_tensor(f"{prefix}.scales") - else: - scales = self.get_sharded(f"{prefix}.scales", dim=0) - - sharded_in_features = self.process_group.size() > 1 - - weight = repack_gptq_for_marlin( - qweight=qweight, - scales=scales, - g_idx=g_idx, - bits=gptq_params.bits, - desc_act=gptq_params.desc_act, - groupsize=gptq_params.groupsize, - sym=gptq_params.sym, - sharded_infeatures=sharded_in_features, - ) else: try: B = self.get_sharded(f"{prefix}.B", dim=0) @@ -743,18 +712,17 @@ class Weights: else: s = self.get_sharded(f"{prefix}.s", dim=0) weight = MarlinWeight(B=B, s=s) - else: weight = self.get_sharded(f"{prefix}.weight", dim=1) return weight - def _get_gptq_params(self) -> _GPTQParams: + def _get_gptq_params(self) -> GPTQParams: try: bits = self.get_tensor("gptq_bits").item() groupsize = self.get_tensor("gptq_groupsize").item() checkpoint_format = getattr(self, "gptq_checkpoint_format", None) desc_act = False - sym = True + sym = False quant_method = "gptq" except (SafetensorError, RuntimeError) as e: try: @@ -767,7 +735,7 @@ class Weights: except Exception: raise e - return _GPTQParams( + return GPTQParams( bits=bits, checkpoint_format=checkpoint_format, desc_act=desc_act,