mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Merge branch 'main' into feature/get-trace-id-from-req-headers
This commit is contained in:
commit
e4d7a6788e
2
.github/workflows/build.yaml
vendored
2
.github/workflows/build.yaml
vendored
@ -137,7 +137,7 @@ jobs:
|
||||
uses: docker/metadata-action@v4.3.0
|
||||
with:
|
||||
flavor: |
|
||||
latest=auto
|
||||
latest=false
|
||||
images: |
|
||||
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
||||
ghcr.io/huggingface/text-generation-inference
|
||||
|
@ -4,6 +4,7 @@ repos:
|
||||
hooks:
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
exclude: crate-hashes.json
|
||||
- id: trailing-whitespace
|
||||
exclude: docs/source/reference/launcher.md
|
||||
- repo: https://github.com/psf/black
|
||||
|
38
Cargo.lock
generated
38
Cargo.lock
generated
@ -3005,6 +3005,17 @@ dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "outlines-core"
|
||||
version = "0.1.0"
|
||||
source = "git+https://github.com/dottxt-ai/outlines-core.git?rev=ba10c619fc9bf3c487e43f49bdecb95a24bb465c#ba10c619fc9bf3c487e43f49bdecb95a24bb465c"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"regex",
|
||||
"serde-pyobject",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
@ -3952,6 +3963,16 @@ dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde-pyobject"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca4b0aad8b225845739a0030a0d5cc2ae949c56a86a7daf9226c7df7c2016d16"
|
||||
dependencies = [
|
||||
"pyo3",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_cbor"
|
||||
version = "0.11.2"
|
||||
@ -3979,6 +4000,7 @@ version = "1.0.133"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
||||
dependencies = [
|
||||
"indexmap 2.6.0",
|
||||
"itoa",
|
||||
"memchr",
|
||||
"ryu",
|
||||
@ -4345,7 +4367,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-backends-trtllm"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@ -4369,7 +4391,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-benchmark"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"average",
|
||||
"clap 4.5.21",
|
||||
@ -4389,7 +4411,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-client"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"base64 0.22.1",
|
||||
@ -4407,7 +4429,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-launcher"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"clap 4.5.21",
|
||||
"ctrlc",
|
||||
@ -4428,8 +4450,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
@ -4453,6 +4476,7 @@ dependencies = [
|
||||
"once_cell",
|
||||
"opentelemetry 0.20.0",
|
||||
"opentelemetry-otlp",
|
||||
"outlines-core",
|
||||
"pyo3",
|
||||
"rand",
|
||||
"regex",
|
||||
@ -4477,7 +4501,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router-v2"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
@ -4526,7 +4550,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "text-generation-router-v3"
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.1-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
|
@ -20,7 +20,7 @@ default-members = [
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "2.4.2-dev0"
|
||||
version = "3.0.2-dev0"
|
||||
edition = "2021"
|
||||
authors = ["Olivier Dehaene"]
|
||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||
|
@ -45,7 +45,7 @@ RUN cargo build --profile release-opt --frozen
|
||||
|
||||
# Text Generation Inference base image for Intel
|
||||
|
||||
FROM intel/intel-extension-for-pytorch:2.3.110-xpu AS xpu
|
||||
FROM intel/oneapi-basekit:2024.2.1-0-devel-ubuntu22.04 AS xpu
|
||||
|
||||
USER root
|
||||
|
||||
@ -87,7 +87,7 @@ RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https:/
|
||||
|
||||
RUN mv /tmp/intel-for-pytorch-gpu-dev.list /etc/apt/sources.list.d
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y intel-basekit=2024.2.1-98 xpu-smi cmake ninja-build pciutils intel-pti-dev-0.9
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt install -y xpu-smi cmake ninja-build pciutils intel-pti-dev-0.9
|
||||
|
||||
# Text Generation Inference base env
|
||||
ENV HF_HOME=/data \
|
||||
@ -114,15 +114,8 @@ RUN cd server && \
|
||||
pip install -r requirements_intel.txt && \
|
||||
pip install ".[accelerate, compressed-tensors, peft, outlines]" --no-cache-dir
|
||||
|
||||
ENV CCL_ROOT=/opt/intel/oneapi/ccl/latest
|
||||
ENV I_MPI_ROOT=/opt/intel/oneapi/mpi/latest
|
||||
ENV FI_PROVIDER_PATH=/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib/prov:/usr/lib/x86_64-linux-gnu/libfabric
|
||||
ENV LIBRARY_PATH=/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mkl/latest/lib/:/opt/intel/oneapi/compiler/latest/lib
|
||||
ENV LD_LIBRARY_PATH=/opt/intel/oneapi/ccl/latest/lib/:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/lib:/opt/intel/oneapi/mpi/latest/lib:/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/compiler/latest/opt/compiler/lib:/opt/intel/oneapi/compiler/latest/lib:/opt/intel/oneapi/lib:/opt/intel/oneapi/lib/intel64:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
|
||||
ENV PATH=/opt/conda/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mpi/latest/bin:/opt/intel/oneapi/mpi/latest/opt/mpi/libfabric/bin:/opt/intel/oneapi/mkl/latest/bin/:/opt/intel/oneapi/compiler/latest/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/pti/0.9/lib:/opt/conda/lib
|
||||
ENV CCL_ZE_IPC_EXCHANGE=sockets
|
||||
ENV CMAKE_PREFIX_PATH=/opt/intel/oneapi/mkl/latest/lib/cmake:/opt/intel/oneapi/compiler/latest
|
||||
ENV CPATH=/opt/intel/oneapi/mpi/latest/include:/opt/intel/oneapi/ccl/latest/include:/opt/intel/oneapi/mkl/latest/include
|
||||
#ENV TORCH_LLM_ALLREDUCE=1
|
||||
#ENV CCL_TOPO_FABRIC_VERTEX_CONNECTION_CHECK=0
|
||||
|
||||
@ -197,9 +190,10 @@ RUN pip install triton py-libnuma
|
||||
|
||||
WORKDIR /usr/src
|
||||
|
||||
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout 2e1c98f74ec1b35ad8dd1ebe7dd4b25470f2fd41
|
||||
RUN git clone https://github.com/intel/intel-extension-for-pytorch && cd intel-extension-for-pytorch && git checkout b7b552baf64283b594665b8687430fe92990e497
|
||||
RUN git clone https://github.com/intel/torch-ccl.git && cd torch-ccl && git checkout v2.4.0+cpu+rc0
|
||||
|
||||
RUN sed -i 's/VERSION_MINOR 6/VERSION_MINOR 5/' intel-extension-for-pytorch/version.txt
|
||||
RUN cd intel-extension-for-pytorch && git submodule sync && git submodule update --init --recursive && python setup.py install
|
||||
|
||||
RUN cd torch-ccl && git submodule sync && git submodule update --init --recursive && pip install .
|
||||
|
22
README.md
22
README.md
@ -84,7 +84,7 @@ model=HuggingFaceH4/zephyr-7b-beta
|
||||
volume=$PWD/data
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model
|
||||
3.0.0 ghcr.io/huggingface/text-generation-inference:3.0.0 --model-id $model
|
||||
```
|
||||
|
||||
And then you can make requests like
|
||||
@ -121,7 +121,7 @@ curl localhost:8080/v1/chat/completions \
|
||||
|
||||
**Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
|
||||
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1-rocm --model-id $model` instead of the command above.
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/installation_amd#using-tgi-with-amd-gpus). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.0-rocm --model-id $model` instead of the command above.
|
||||
|
||||
To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
|
||||
```
|
||||
@ -151,7 +151,7 @@ model=meta-llama/Meta-Llama-3.1-8B-Instruct
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
token=<your cli READ token>
|
||||
|
||||
docker run --gpus all --shm-size 1g -e HF_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model
|
||||
docker run --gpus all --shm-size 1g -e HF_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.0 --model-id $model
|
||||
```
|
||||
|
||||
### A note on Shared Memory (shm)
|
||||
@ -196,14 +196,26 @@ Detailed blogpost by Adyen on TGI inner workings: [LLM inference at scale with T
|
||||
|
||||
You can also opt to install `text-generation-inference` locally.
|
||||
|
||||
First [install Rust](https://rustup.rs/) and create a Python virtual environment with at least
|
||||
Python 3.9, e.g. using `conda`:
|
||||
First clone the repository and change directoy into it:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/huggingface/text-generation-inference
|
||||
cd text-generation-inference
|
||||
```
|
||||
|
||||
Then [install Rust](https://rustup.rs/) and create a Python virtual environment with at least
|
||||
Python 3.9, e.g. using `conda` or `python venv`:
|
||||
|
||||
```shell
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
|
||||
|
||||
#using conda
|
||||
conda create -n text-generation-inference python=3.11
|
||||
conda activate text-generation-inference
|
||||
|
||||
#using pyton venv
|
||||
python3 -m venv .venv
|
||||
source .venv/bin/activate
|
||||
```
|
||||
|
||||
You may also need to install Protoc.
|
||||
|
BIN
assets/v3_benchmarks.png
Normal file
BIN
assets/v3_benchmarks.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 209 KiB |
@ -104,6 +104,10 @@ impl Backend for BackendV2 {
|
||||
}
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
fn start_health(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Batching logic
|
||||
|
@ -436,6 +436,7 @@ mod tests {
|
||||
stopping_parameters: ValidStoppingParameters {
|
||||
ignore_eos_token: false,
|
||||
max_new_tokens: 1,
|
||||
max_total_new_tokens: 1024,
|
||||
stop_sequences: vec![],
|
||||
},
|
||||
top_n_tokens: 0,
|
||||
|
@ -111,6 +111,10 @@ impl Backend for BackendV3 {
|
||||
}
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
fn start_health(&self) -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Batching logic
|
||||
|
@ -217,8 +217,8 @@ impl Health for ShardedClient {
|
||||
input_chunks: Some(Input {
|
||||
chunks: vec![Chunk::Text("liveness".into()).into()],
|
||||
}),
|
||||
truncate: 10,
|
||||
add_special_tokens: true,
|
||||
truncate: 1,
|
||||
add_special_tokens: false,
|
||||
prefill_logprobs: false,
|
||||
parameters: Some(NextTokenChooserParameters {
|
||||
temperature: 1.0,
|
||||
@ -241,7 +241,7 @@ impl Health for ShardedClient {
|
||||
top_n_tokens: 0,
|
||||
// Block 0 is reserved for health checks
|
||||
blocks: vec![0],
|
||||
slots: (0..16).collect(),
|
||||
slots: vec![0],
|
||||
cache_len: 0,
|
||||
adapter_id: None,
|
||||
chunk_len: None,
|
||||
|
@ -573,6 +573,7 @@ mod tests {
|
||||
stopping_parameters: ValidStoppingParameters {
|
||||
ignore_eos_token: false,
|
||||
max_new_tokens: 1,
|
||||
max_total_new_tokens: 1024,
|
||||
stop_sequences: vec![],
|
||||
},
|
||||
top_n_tokens: 0,
|
||||
|
3
crate-hashes.json
Normal file
3
crate-hashes.json
Normal file
@ -0,0 +1,3 @@
|
||||
{
|
||||
"git+https://github.com/dottxt-ai/outlines-core.git?rev=ba10c619fc9bf3c487e43f49bdecb95a24bb465c#outlines-core@0.1.0": "1j9dcd831b0bmmjk2n4aag3x47qnqmkpg4gqpvwwyic7744llbfm"
|
||||
}
|
@ -10,7 +10,7 @@
|
||||
"name": "Apache 2.0",
|
||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
},
|
||||
"version": "2.4.2-dev0"
|
||||
"version": "3.0.1-dev0"
|
||||
},
|
||||
"paths": {
|
||||
"/": {
|
||||
@ -1013,6 +1013,7 @@
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||
"default": "1024",
|
||||
"example": "32",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
@ -1329,7 +1330,8 @@
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||
"default": "32",
|
||||
"default": "1024",
|
||||
"example": "32",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
@ -1591,7 +1593,7 @@
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"description": "Maximum number of tokens to generate.",
|
||||
"default": "100",
|
||||
"default": "1024",
|
||||
"example": "20",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
|
@ -54,6 +54,8 @@
|
||||
title: API Reference
|
||||
title: Reference
|
||||
- sections:
|
||||
- local: conceptual/chunking
|
||||
title: V3 update, caching and chunking
|
||||
- local: conceptual/streaming
|
||||
title: Streaming
|
||||
- local: conceptual/quantization
|
||||
|
@ -19,6 +19,6 @@ docker run --gpus all \
|
||||
--shm-size 1g \
|
||||
-e HF_TOKEN=$token \
|
||||
-p 8080:80 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.1 \
|
||||
--model-id $model
|
||||
```
|
||||
|
125
docs/source/conceptual/chunking.md
Normal file
125
docs/source/conceptual/chunking.md
Normal file
@ -0,0 +1,125 @@
|
||||
# TGI v3 overview
|
||||
## Summary
|
||||
|
||||
|
||||
Performance leap: TGI processes 3x more tokens, 13x faster than vLLM on long prompts. Zero config !
|
||||
|
||||
### 3x more tokens.
|
||||
By reducing our memory footprint, we’re able to ingest many more tokens and more dynamically than before. A single L4 (24GB) can handle 30k tokens on llama 3.1-8B, while vLLM gets barely 10k. A lot of work went into reducing the footprint of the runtime and its effect are best seen on smaller constrained environments.
|
||||
|
||||
### 13x faster
|
||||
On long prompts (200k+ tokens) conversation replies take 27.5s in vLLM, while it takes only 2s in TGI. How so ? We keep the initial conversation around, so when a new reply comes in, we can answer almost instantly. The overhead of the lookup is ~5us. Thanks @Daniël de Kok for the beast data structure.
|
||||
|
||||
### Zero config
|
||||
That’s it. Remove all the flags your are using and you’re likely to get the best performance. By evaluating the hardware and model, TGI carefully selects automatic values to give best performance. In production, we don’t have any flags anymore in our deployments. We kept all existing flags around, they may come in handy in niche scenarios.
|
||||
|
||||
|
||||
|
||||
## Benchmarks
|
||||
|
||||
### Methodology
|
||||
|
||||
To ensure accurate and reliable results, we employed a robust benchmarking protocol that addresses common pitfalls in performance evaluation. Specifically:
|
||||
|
||||
1. **Consistent Code**: We used the same codebase to run against different engines, ensuring that any performance differences are attributable to the LLM itself, rather than variations in the testing framework.
|
||||
2. **Request-Based Measurement**: Instead of measuring Requests Per Second (RPS) by sending as many requests as possible, we opted for a more consistent approach, sending a fixed number of requests and measuring the time it takes for the server to complete all of them. This method avoids boundary effects and provides a more accurate representation of performance.
|
||||
3. **Realistic Combinations**: We selected realistic combinations of LLMs and hardware configurations so we used 8xH100 for a 70B, not a 8B, which would be a waste of money.
|
||||
4. **Realistic scenarios** We benchmarked engines with prefix caching on, so we are reporting the results of the 2nd run, not the first one.
|
||||
During the first run of a benchmark, every request is new, so prefix caching is not working, masking the real world benefits of using it.
|
||||
|
||||
Note: Boundary effect is when the benchmarks are flaky because their results depend on fine details of the engine being benchmarked.
|
||||
For instance, a system ingesting a constant 10RPS, but receiving in the benchmark a single final request at -0.1s before the end of the benchmark, and that single request takes a full 10s to process. Then a benchmark taking 30s would measure 7.5RPS instead of the expected 10, because that single query isn't being parallelized with others. Another very slightly slower engine would receive that request at +0.1s which would get discarded by the benchmark and therefore measure the slower system as being faster.
|
||||
|
||||
For more details on benchmarking in general we recommend the documentation of k6: https://grafana.com/docs/k6/latest/.
|
||||
|
||||
### Scenarios
|
||||
|
||||
We selected a handful of scenarios to simplify the picture, they seem to accurately reflect a larger trend.
|
||||
|
||||
1. **Small scenario**: This scenario consists of the first 200 requests from the orca datasets being prompted to the model. The 200 requests total 8k tokens together and are representative of conversation starters. Prefix caching has very limited impact in that scenario and we feel it's a relatively balanced benchmark for simple use cases.
|
||||
2. **Long scenario**: This scenario consists of 20 requests totalling 200k prompt tokens which are essentially asking for summaries of large chunks for text. In practical scenarios this is really useful when you are feeding large chunks of code, large chunks of business data or documents repeatedly and ask simple questions about them (summarization, classification, or where to find some data). This scenario is the one closest to what a lot of professional use cases seem to be doing by including a lot of information in the prompt itself. Those very long conversations are the ones that benefit the most for our recent changes since we are enable ever larger prompts and ever faster caching.
|
||||
|
||||
### Hardware
|
||||
|
||||
1. `L4` : This is a single L4 (24GB) which represents small or even home compute capabilities. We tested `meta-llama/Meta-Llama-3.1-8B-Instruct` on it.
|
||||
2. `4xL4`: This is a more beefy deployment usually used for either very large requests deployments for 8B models (the ones under test) or it can also easily handle all 30GB models. For this benchmark we tested `meta-llama/Meta-Llama-3.1-8B-Instruct`
|
||||
3. `8xH100` This is one of the beefiest deployments possible. We tested `meta-llama/Meta-Llama-3.1-70B-Instruct` as it's the most representative models of this size. Llama 3.3 wasn't released at the time of benchmarking (it's the exact same model so it doesn't make any difference).
|
||||
|
||||
|
||||
### Replicating the results
|
||||
|
||||
|
||||
|
||||
The commands to run the benchmarks are as follows:
|
||||
|
||||
1. Prepare the datasets:
|
||||
|
||||
```bash
|
||||
cd text-generation-inference/load_tests
|
||||
make prepare_orca
|
||||
python long.py
|
||||
```
|
||||
|
||||
2. Launch the engine:
|
||||
|
||||
TGI: `text-generation-launcher --model-id $MODEL_ID --num-shard $N --port 8000` (or docker variant)
|
||||
vLLM: `vllm serve $MODEL_ID --tensor-parallel $N —enable-prefix-caching` (or docker variant)
|
||||
|
||||
3. Start scenario:
|
||||
Small: `MODEL_ID=$MODEL_ID HOST=localhost:8000 k6 run load_tests/common.js`
|
||||
Long: `MODEL_ID=$MODEL_ID HOST=localhost:8000 k6 run load_tests/long.js`
|
||||
|
||||
|
||||
### Results
|
||||
|
||||

|
||||
|
||||
Our benchmarking results show significant performance gains, with a 13x speedup over vLLM with prefix caching, and up to 30x speedup without prefix caching. These results are consistent with our production data and demonstrate the effectiveness of our optimized LLM architecture.
|
||||
|
||||
Raw results
|
||||
|
||||
| | | | | |
|
||||
|---|---|---|---|---|
|
||||
|2nd run ||**TGI v3** (time in s)|**vLLM** (s)|**Amount of req**|
|
||||
|**Llama 3.1 8b**|Small test - L4 - 8B|17.5|19.9|200|
|
||||
|**Llama 3.1 8b**|Long test* - L4 - 8B|53|57|10|
|
||||
|**Llama 3.1 8b**|Small test - 4xL4 - 8B|4.8|6|200|
|
||||
|**Llama 3.1 8b**|Long test - 4xL4 - 8B|3.2|12.5|20|
|
||||
|**Llama 3.1 70b**|Small test - 8XH100 - 70B|6.2|7.4|200|
|
||||
|**Llama 3.1 70b**|Long test - 8H100 - 70B|2|27.5|20|
|
||||
||||||
|
||||
|1st run ||TGI (s)|vLLM (s)|Amount of req|
|
||||
|**Llama 3.1 8b**|Small test - L4|19.9|19.9|200|
|
||||
|**Llama 3.1 8b**|Long test (10) - L4|49.8|55|10|
|
||||
|**Llama 3.1 8b**|Small test - 4xL4|13|12.6|200|
|
||||
|**Llama 3.1 8b**|Long test - 4xL4|47|50.3|20|
|
||||
|**Llama 3.1 70b**|Small test - 8XH100|7.5|7.6|200|
|
||||
|**Llama 3.1 70b**|Long test - 8H100|12.1|28.3|20|
|
||||
|
||||
|
||||
### Caveats and Limitations
|
||||
|
||||
While our results are promising, there are some caveats to consider:
|
||||
|
||||
1. **Constrained kv-cache**: If a deployment lacks kv-cache space, that means that many queries will require the same slots of kv-cache, leading to contention in the kv-cache. You can limit that effect by limiting `--max-total-tokens` to reduce individual queries impact. You can also use more GPUs or larger GPUs in order to increase the size of the kv-cache.
|
||||
2. **Replication**: In scenarios where multiple replicas are behind a single endpoint, there's no reason for every query from a particular user to hit the same replica, therefore the cache will not be present, meaning no speed benefit. You can use sticky sessions load balancing to force every user to send their requests on the same replica. Do not apply this blindly, it's possible this may not be necessary at all.
|
||||
|
||||
## Technical Insights
|
||||
|
||||
Our performance gains can be attributed to several key factors:
|
||||
|
||||
1. **New Kernels**: Our custom kernels, including `flashinfer` and `flashdecoding`, offer improved performance at large prompt lengths and enable more efficient scheduling.
|
||||
2. **Prefix Caching**: Our optimized prefix caching structure allows for fast query matching, even for long prompts. The overhead is roughly 6us.
|
||||
3. **Chunking Code**: Our chunking code enables finer control over compute resources, ensuring optimal performance and reduced VRAM usage.
|
||||
4. **Kernel Optimizations**: We've implemented various other kernel optimizations, including better kernel selection. Notably we've implemented several small kernels involved in the queries bookkeeping which are particularly efficient on small models. Every kernel launch has an overhead of several milliseconds so fusing them together increases a lot performance when this bookkeeping is important relative to the raw model calculations. This happens typically on oversized compute for a particular model and particularly small models.
|
||||
5. **VRAM efficiency**: In the realm of very large requests (100k+ tokens) there are a lot of places which start becoming big memory consumers. We've hunted the biggest ones and found ways to reduce/reuse or delete them. The biggest culprit probably is `logits` calculation. Logits for llama 3.1-8b take 25.6GB (=100k tokens * 128k vocabulary * 2(f16)) which is more than the full model which is 16GB. The thing is that in general we do not need every prompt logits, so we simply removed them and removed them from being potentially asked by users by default. We think this is ok since they are mostly used by researchers. You can enable your deployments to have them again by using the `--enable-prefill-logprobs` flag, but you will experience reduced token prompt size.
|
||||
|
||||
## Future Directions
|
||||
|
||||
While we've made significant progress, there are still opportunities for improvement:
|
||||
|
||||
1. **Special models**: All LLMs come with the aforementioned improvements. Some specific set of features might not (some quantizations, speculation or VLMs for instance are harder to optimize for with the same level of detail).
|
||||
2. **KV-Cache Long-Term Retention**: Addressing KV-cache long-term retention is a challenge. There are several solutions envisionned like shared KV-cache (like redis or memcached) solutions or innovative storage approaches. It is an area of ongoing research of ours.
|
||||
3. **Multimodal models**: We are also investigating quite a lot other kind of models, like audio-to-audio, image/video generation, and other hybrids, where we see a lot of potential of applying the same principles we've applied in TGI to maximize performance.
|
||||
|
||||
By sharing our benchmarking methodology, results, and technical insights, we aim to contribute to the ongoing development of more efficient and effective LLMs.
|
@ -19,7 +19,7 @@ bitsandbytes is a library used to apply 8-bit and 4-bit quantization to models.
|
||||
In TGI, you can use 8-bit quantization by adding `--quantize bitsandbytes` like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model --quantize bitsandbytes
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.1 --model-id $model --quantize bitsandbytes
|
||||
```
|
||||
|
||||
4-bit quantization is also possible with bitsandbytes. You can choose one of the following 4-bit data types: 4-bit float (`fp4`), or 4-bit `NormalFloat` (`nf4`). These data types were introduced in the context of parameter-efficient fine-tuning, but you can apply them for inference by automatically converting the model weights on load.
|
||||
@ -27,7 +27,7 @@ docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingf
|
||||
In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model --quantize bitsandbytes-nf4
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.1 --model-id $model --quantize bitsandbytes-nf4
|
||||
```
|
||||
|
||||
You can get more information about 8-bit quantization by reading this [blog post](https://huggingface.co/blog/hf-bitsandbytes-integration), and 4-bit quantization by reading [this blog post](https://huggingface.co/blog/4bit-transformers-bitsandbytes).
|
||||
@ -48,7 +48,7 @@ $$({\hat{W}_{l}}^{*} = argmin_{\hat{W_{l}}} ||W_{l}X-\hat{W}_{l}X||^{2}_{2})$$
|
||||
TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
|
||||
|
||||
```bash
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.4.1 --model-id $model --quantize gptq
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:3.0.1 --model-id $model --quantize gptq
|
||||
```
|
||||
|
||||
Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
|
||||
|
@ -19,7 +19,7 @@ Text Generation Inference implements many optimizations and features, such as:
|
||||
- Stop sequences
|
||||
- Log probabilities
|
||||
- Fine-tuning Support: Utilize fine-tuned models for specific tasks to achieve higher accuracy and performance.
|
||||
- [Guidance](../conceptual/guidance): Enable function calling and tool-use by forcing the model to generate structured outputs based on your own predefined output schemas.
|
||||
- [Guidance](conceptual/guidance): Enable function calling and tool-use by forcing the model to generate structured outputs based on your own predefined output schemas.
|
||||
|
||||
Text Generation Inference is used in production by multiple projects, such as:
|
||||
|
||||
|
@ -11,7 +11,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm -it --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
|
||||
--device=/dev/kfd --device=/dev/dri --group-add video \
|
||||
--ipc=host --shm-size 256g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1-rocm \
|
||||
ghcr.io/huggingface/text-generation-inference:3.0.1-rocm \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
|
@ -12,7 +12,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm --privileged --cap-add=sys_nice \
|
||||
--device=/dev/dri \
|
||||
--ipc=host --shm-size 1g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1-intel-xpu \
|
||||
ghcr.io/huggingface/text-generation-inference:3.0.1-intel-xpu \
|
||||
--model-id $model --cuda-graphs 0
|
||||
```
|
||||
|
||||
@ -29,7 +29,7 @@ volume=$PWD/data # share a volume with the Docker container to avoid downloading
|
||||
docker run --rm --privileged --cap-add=sys_nice \
|
||||
--device=/dev/dri \
|
||||
--ipc=host --shm-size 1g --net host -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1-intel-cpu \
|
||||
ghcr.io/huggingface/text-generation-inference:3.0.1-intel-cpu \
|
||||
--model-id $model --cuda-graphs 0
|
||||
```
|
||||
|
||||
|
@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 64g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1 \
|
||||
ghcr.io/huggingface/text-generation-inference:3.0.1 \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
|
@ -11,7 +11,7 @@ model=teknium/OpenHermes-2.5-Mistral-7B
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:2.4.1 \
|
||||
ghcr.io/huggingface/text-generation-inference:3.0.1 \
|
||||
--model-id $model
|
||||
```
|
||||
|
||||
@ -96,7 +96,7 @@ curl 127.0.0.1:8080/generate \
|
||||
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
|
||||
|
||||
```bash
|
||||
docker run ghcr.io/huggingface/text-generation-inference:2.4.1 --help
|
||||
docker run ghcr.io/huggingface/text-generation-inference:3.0.1 --help
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
@ -163,7 +163,7 @@ hub = {
|
||||
|
||||
# create Hugging Face Model Class
|
||||
huggingface_model = HuggingFaceModel(
|
||||
image_uri=get_huggingface_llm_image_uri("huggingface",version="2.4.1"),
|
||||
image_uri=get_huggingface_llm_image_uri("huggingface",version="3.0.1"),
|
||||
env=hub,
|
||||
role=role,
|
||||
)
|
||||
|
@ -467,6 +467,16 @@ Options:
|
||||
[env: PAYLOAD_LIMIT=]
|
||||
[default: 2000000]
|
||||
|
||||
```
|
||||
## ENABLE_PREFILL_LOGPROBS
|
||||
```shell
|
||||
--enable-prefill-logprobs
|
||||
Enables prefill logprobs
|
||||
|
||||
Logprobs in the prompt are deactivated by default because they consume a large amount of VRAM (especially for long prompts). Using this flag reallows users to ask for them.
|
||||
|
||||
[env: ENABLE_PREFILL_LOGPROBS=]
|
||||
|
||||
```
|
||||
## HELP
|
||||
```shell
|
||||
|
@ -43,7 +43,7 @@ If the above list lacks the model you would like to serve, depending on the mode
|
||||
|
||||
```python
|
||||
# for causal LMs/text-generation models
|
||||
AutoModelForCausalLM.from_pretrained(<model>, device_map="auto")`
|
||||
AutoModelForCausalLM.from_pretrained(<model>, device_map="auto")
|
||||
# or, for text-to-text generation models
|
||||
AutoModelForSeq2SeqLM.from_pretrained(<model>, device_map="auto")
|
||||
```
|
||||
|
@ -148,6 +148,8 @@
|
||||
};
|
||||
|
||||
packages = rec {
|
||||
inherit server;
|
||||
|
||||
default = pkgs.writeShellApplication {
|
||||
name = "text-generation-inference";
|
||||
runtimeInputs = [
|
||||
|
22
integration-tests/models/__snapshots__/test.py
Normal file
22
integration-tests/models/__snapshots__/test.py
Normal file
@ -0,0 +1,22 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
for root, dirs, files in os.walk("."):
|
||||
for filename in files:
|
||||
if filename.endswith(".json"):
|
||||
with open(os.path.join(root, filename), "r") as f:
|
||||
data = json.load(f)
|
||||
|
||||
print(os.path.join(root, filename))
|
||||
try:
|
||||
if filename.endswith("_load.json"):
|
||||
for i in range(len(data)):
|
||||
data[i]["details"]["prefill"] = []
|
||||
else:
|
||||
data["details"]["prefill"] = []
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
with open(os.path.join(root, filename), "w") as f:
|
||||
json.dump(data, f, indent=2, ensure_ascii=False)
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.4902344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.4902344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.4902344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.4902344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.3867188,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -1.1318359,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.6875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.3007812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.4902344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.5,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,28 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,33 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.5,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -103,33 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.5,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -202,33 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.5,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -301,33 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 3838,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -8.59375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5538,
|
||||
"logprob": -10.921875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6832,
|
||||
"logprob": -0.56347656,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.5,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.609375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92529297,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94628906,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.9042969,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.609375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92529297,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94628906,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.609375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92529297,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94628906,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.9042969,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.6054688,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92089844,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94433594,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.90625,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.6054688,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92089844,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94433594,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.90625,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.6054688,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.92089844,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -10.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -0.94433594,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.90625,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.69140625,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.32226562,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 235336,
|
||||
"logprob": -0.33203125,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.69140625,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.32226562,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.69140625,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.32226562,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 235336,
|
||||
"logprob": -0.33203125,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.71484375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.30859375,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 235336,
|
||||
"logprob": -0.3359375,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.71484375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.30859375,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 235336,
|
||||
"logprob": -0.3359375,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 1841,
|
||||
"logprob": -5.46875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.71484375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5271,
|
||||
"logprob": -12.0,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6044,
|
||||
"logprob": -0.30859375,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 235336,
|
||||
"logprob": -0.3359375,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5390625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.86035156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4912109,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.1152344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5390625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.86035156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4912109,
|
||||
"text": " learning"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5390625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.86035156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4912109,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.1152344,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5351562,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.85791016,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4882812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.1210938,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5351562,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.85791016,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4882812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.1210938,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -7.5351562,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.85791016,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -8.828125,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.4882812,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -2.1210938,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Both an elephant and a mouse are mammals. However, the differences between elephants and mice are:\n\n1",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1732541189,
|
||||
"id": "",
|
||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 30,
|
||||
"prompt_tokens": 49,
|
||||
"total_tokens": 79
|
||||
}
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": " the royal mouse? It is a little more slender and only weighs around 1.5 pounds for males and 1.3 pounds",
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
],
|
||||
"created": 1732541190,
|
||||
"id": "",
|
||||
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "2.4.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 30,
|
||||
"prompt_tokens": 73,
|
||||
"total_tokens": 103
|
||||
}
|
||||
}
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.703125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4765625,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8583984,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7548828,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -9.0859375,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -10.90625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -2.65625,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -4.8085938,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.703125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4765625,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8652344,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7548828,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.703125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4765625,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8583984,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7548828,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.703125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4765625,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8652344,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7548828,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.703125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4765625,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8652344,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7548828,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.6914062,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4746094,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8623047,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7558594,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.6914062,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4746094,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8623047,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7558594,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.6914062,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4746094,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8623047,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7558594,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.6914062,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4746094,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8623047,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7558594,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -7.6914062,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.4746094,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.390625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.8623047,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.7558594,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 4,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 100000,
|
||||
"logprob": null,
|
||||
"text": "<|begin▁of▁sentence|>"
|
||||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3102,
|
||||
"logprob": -11.25,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,313 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 50,
|
||||
"logprob": null,
|
||||
"text": "G"
|
||||
},
|
||||
{
|
||||
"id": 330,
|
||||
"logprob": -5.96875,
|
||||
"text": "ir"
|
||||
},
|
||||
{
|
||||
"id": 1622,
|
||||
"logprob": -5.6132812,
|
||||
"text": "af"
|
||||
},
|
||||
{
|
||||
"id": 249,
|
||||
"logprob": -6.5039062,
|
||||
"text": "at"
|
||||
},
|
||||
{
|
||||
"id": 1480,
|
||||
"logprob": -8.078125,
|
||||
"text": "ron"
|
||||
},
|
||||
{
|
||||
"id": 304,
|
||||
"logprob": -2.3261719,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 23866,
|
||||
"logprob": -9.59375,
|
||||
"text": " obsessed"
|
||||
},
|
||||
{
|
||||
"id": 335,
|
||||
"logprob": -0.048339844,
|
||||
"text": " with"
|
||||
},
|
||||
{
|
||||
"id": 26680,
|
||||
"logprob": -4.0,
|
||||
"text": " gir"
|
||||
},
|
||||
{
|
||||
"id": 1903,
|
||||
"logprob": -0.07556152,
|
||||
"text": "aff"
|
||||
},
|
||||
{
|
||||
"id": 255,
|
||||
"logprob": -0.0067749023,
|
||||
"text": "es"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"logprob": -1.546875,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 248,
|
||||
"logprob": -4.3320312,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 758,
|
||||
"logprob": -3.734375,
|
||||
"text": " most"
|
||||
},
|
||||
{
|
||||
"id": 21735,
|
||||
"logprob": -5.109375,
|
||||
"text": " glorious"
|
||||
},
|
||||
{
|
||||
"id": 5985,
|
||||
"logprob": -2.09375,
|
||||
"text": " animal"
|
||||
},
|
||||
{
|
||||
"id": 313,
|
||||
"logprob": -1.1835938,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 248,
|
||||
"logprob": -0.77685547,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1936,
|
||||
"logprob": -2.3828125,
|
||||
"text": " face"
|
||||
},
|
||||
{
|
||||
"id": 275,
|
||||
"logprob": -0.004432678,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 414,
|
||||
"logprob": -1.9677734,
|
||||
"text": " this"
|
||||
},
|
||||
{
|
||||
"id": 6490,
|
||||
"logprob": -2.046875,
|
||||
"text": " Earth"
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"logprob": -0.28198242,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 401,
|
||||
"logprob": -7.9179688,
|
||||
"text": " G"
|
||||
},
|
||||
{
|
||||
"id": 6013,
|
||||
"logprob": -2.2753906,
|
||||
"text": "ira"
|
||||
},
|
||||
{
|
||||
"id": 694,
|
||||
"logprob": -0.6230469,
|
||||
"text": "ft"
|
||||
},
|
||||
{
|
||||
"id": 1480,
|
||||
"logprob": -0.20874023,
|
||||
"text": "ron"
|
||||
},
|
||||
{
|
||||
"id": 9369,
|
||||
"logprob": -4.5507812,
|
||||
"text": " believes"
|
||||
},
|
||||
{
|
||||
"id": 455,
|
||||
"logprob": -4.5664062,
|
||||
"text": " all"
|
||||
},
|
||||
{
|
||||
"id": 599,
|
||||
"logprob": -2.7402344,
|
||||
"text": " other"
|
||||
},
|
||||
{
|
||||
"id": 5632,
|
||||
"logprob": -0.21948242,
|
||||
"text": " animals"
|
||||
},
|
||||
{
|
||||
"id": 362,
|
||||
"logprob": -0.7675781,
|
||||
"text": " are"
|
||||
},
|
||||
{
|
||||
"id": 23981,
|
||||
"logprob": -5.0,
|
||||
"text": " irrelevant"
|
||||
},
|
||||
{
|
||||
"id": 635,
|
||||
"logprob": -4.234375,
|
||||
"text": " when"
|
||||
},
|
||||
{
|
||||
"id": 4354,
|
||||
"logprob": -0.5131836,
|
||||
"text": " compared"
|
||||
},
|
||||
{
|
||||
"id": 271,
|
||||
"logprob": -0.103637695,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 248,
|
||||
"logprob": -0.58447266,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 21735,
|
||||
"logprob": -3.6835938,
|
||||
"text": " glorious"
|
||||
},
|
||||
{
|
||||
"id": 64398,
|
||||
"logprob": -1.8173828,
|
||||
"text": " majesty"
|
||||
},
|
||||
{
|
||||
"id": 275,
|
||||
"logprob": -0.23510742,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 248,
|
||||
"logprob": -0.35473633,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26680,
|
||||
"logprob": -0.24633789,
|
||||
"text": " gir"
|
||||
},
|
||||
{
|
||||
"id": 23226,
|
||||
"logprob": -0.02960205,
|
||||
"text": "affe"
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"logprob": -0.17333984,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 193,
|
||||
"logprob": -1.3935547,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 23626,
|
||||
"logprob": -10.0625,
|
||||
"text": "Daniel"
|
||||
},
|
||||
{
|
||||
"id": 37,
|
||||
"logprob": -4.59375,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 23090,
|
||||
"logprob": -6.9375,
|
||||
"text": " Hello"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"logprob": -0.99365234,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 29033,
|
||||
"logprob": -2.2324219,
|
||||
"text": " Gir"
|
||||
},
|
||||
{
|
||||
"id": 1622,
|
||||
"logprob": -0.10809326,
|
||||
"text": "af"
|
||||
},
|
||||
{
|
||||
"id": 249,
|
||||
"logprob": -0.042663574,
|
||||
"text": "at"
|
||||
},
|
||||
{
|
||||
"id": 1480,
|
||||
"logprob": -0.0024776459,
|
||||
"text": "ron"
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"logprob": -1.4277344,
|
||||
"text": "!"
|
||||
},
|
||||
{
|
||||
"id": 193,
|
||||
"logprob": -1.1015625,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 50,
|
||||
"logprob": -0.05709839,
|
||||
"text": "G"
|
||||
},
|
||||
{
|
||||
"id": 330,
|
||||
"logprob": -0.13208008,
|
||||
"text": "ir"
|
||||
},
|
||||
{
|
||||
"id": 1622,
|
||||
"logprob": -0.0071487427,
|
||||
"text": "af"
|
||||
},
|
||||
{
|
||||
"id": 249,
|
||||
"logprob": -0.008468628,
|
||||
"text": "at"
|
||||
},
|
||||
{
|
||||
"id": 1480,
|
||||
"logprob": -0.00068998337,
|
||||
"text": "ron"
|
||||
},
|
||||
{
|
||||
"id": 37,
|
||||
"logprob": -0.0074691772,
|
||||
"text": ":"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 330,
|
||||
"logprob": null,
|
||||
"text": "ir"
|
||||
},
|
||||
{
|
||||
"id": 1622,
|
||||
"logprob": -7.8125,
|
||||
"text": "af"
|
||||
},
|
||||
{
|
||||
"id": 249,
|
||||
"logprob": -4.5,
|
||||
"text": "at"
|
||||
},
|
||||
{
|
||||
"id": 1480,
|
||||
"logprob": -10.875,
|
||||
"text": "ron"
|
||||
},
|
||||
{
|
||||
"id": 37,
|
||||
"logprob": -3.6875,
|
||||
"text": ":"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -11.0,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -10.0625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -11.0,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,188 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -47.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 1645,
|
||||
"logprob": -18.875,
|
||||
"text": "user"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -7.15625,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -4.78125,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 5559,
|
||||
"logprob": -10.0,
|
||||
"text": "Write"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.1171875,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 19592,
|
||||
"logprob": -2.46875,
|
||||
"text": " poem"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -5.84375,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 1707,
|
||||
"logprob": -6.375,
|
||||
"text": " help"
|
||||
},
|
||||
{
|
||||
"id": 682,
|
||||
"logprob": -2.125,
|
||||
"text": " me"
|
||||
},
|
||||
{
|
||||
"id": 5434,
|
||||
"logprob": -1.546875,
|
||||
"text": " remember"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.62890625,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1370,
|
||||
"logprob": -6.65625,
|
||||
"text": " first"
|
||||
},
|
||||
{
|
||||
"id": 235248,
|
||||
"logprob": -1.84375,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 235274,
|
||||
"logprob": -0.45117188,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 235276,
|
||||
"logprob": -0.07421875,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 6635,
|
||||
"logprob": -2.109375,
|
||||
"text": " elements"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.4140625,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.0009536743,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26163,
|
||||
"logprob": -0.033203125,
|
||||
"text": " periodic"
|
||||
},
|
||||
{
|
||||
"id": 3037,
|
||||
"logprob": -0.0002670288,
|
||||
"text": " table"
|
||||
},
|
||||
{
|
||||
"id": 235269,
|
||||
"logprob": -4.75,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 7385,
|
||||
"logprob": -11.625,
|
||||
"text": " giving"
|
||||
},
|
||||
{
|
||||
"id": 1853,
|
||||
"logprob": -4.875,
|
||||
"text": " each"
|
||||
},
|
||||
{
|
||||
"id": 5356,
|
||||
"logprob": -0.38867188,
|
||||
"text": " element"
|
||||
},
|
||||
{
|
||||
"id": 1277,
|
||||
"logprob": -3.65625,
|
||||
"text": " its"
|
||||
},
|
||||
{
|
||||
"id": 1997,
|
||||
"logprob": -4.4375,
|
||||
"text": " own"
|
||||
},
|
||||
{
|
||||
"id": 2017,
|
||||
"logprob": -0.29882812,
|
||||
"text": " line"
|
||||
},
|
||||
{
|
||||
"id": 235265,
|
||||
"logprob": -0.16699219,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 107,
|
||||
"logprob": -25.625,
|
||||
"text": "<end_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -6.75,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -39.5,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 2516,
|
||||
"logprob": -32.5,
|
||||
"text": "model"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -10.125,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -3.421875,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,188 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -47.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 1645,
|
||||
"logprob": -18.875,
|
||||
"text": "user"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -7.25,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -4.78125,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 5559,
|
||||
"logprob": -10.0,
|
||||
"text": "Write"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.111816406,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 19592,
|
||||
"logprob": -2.46875,
|
||||
"text": " poem"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -5.78125,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 1707,
|
||||
"logprob": -6.375,
|
||||
"text": " help"
|
||||
},
|
||||
{
|
||||
"id": 682,
|
||||
"logprob": -2.125,
|
||||
"text": " me"
|
||||
},
|
||||
{
|
||||
"id": 5434,
|
||||
"logprob": -1.59375,
|
||||
"text": " remember"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.62890625,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1370,
|
||||
"logprob": -6.625,
|
||||
"text": " first"
|
||||
},
|
||||
{
|
||||
"id": 235248,
|
||||
"logprob": -1.7421875,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 235274,
|
||||
"logprob": -0.44921875,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 235276,
|
||||
"logprob": -0.07128906,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 6635,
|
||||
"logprob": -2.109375,
|
||||
"text": " elements"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.40429688,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.0009918213,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26163,
|
||||
"logprob": -0.03540039,
|
||||
"text": " periodic"
|
||||
},
|
||||
{
|
||||
"id": 3037,
|
||||
"logprob": -0.00028800964,
|
||||
"text": " table"
|
||||
},
|
||||
{
|
||||
"id": 235269,
|
||||
"logprob": -4.71875,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 7385,
|
||||
"logprob": -11.875,
|
||||
"text": " giving"
|
||||
},
|
||||
{
|
||||
"id": 1853,
|
||||
"logprob": -4.875,
|
||||
"text": " each"
|
||||
},
|
||||
{
|
||||
"id": 5356,
|
||||
"logprob": -0.38867188,
|
||||
"text": " element"
|
||||
},
|
||||
{
|
||||
"id": 1277,
|
||||
"logprob": -3.65625,
|
||||
"text": " its"
|
||||
},
|
||||
{
|
||||
"id": 1997,
|
||||
"logprob": -4.4375,
|
||||
"text": " own"
|
||||
},
|
||||
{
|
||||
"id": 2017,
|
||||
"logprob": -0.3046875,
|
||||
"text": " line"
|
||||
},
|
||||
{
|
||||
"id": 235265,
|
||||
"logprob": -0.16113281,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 107,
|
||||
"logprob": -25.625,
|
||||
"text": "<end_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -6.75,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -39.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 2516,
|
||||
"logprob": -32.5,
|
||||
"text": "model"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -10.1875,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -3.296875,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -258,188 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -47.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 1645,
|
||||
"logprob": -18.875,
|
||||
"text": "user"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -7.25,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -4.78125,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 5559,
|
||||
"logprob": -10.0,
|
||||
"text": "Write"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.111816406,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 19592,
|
||||
"logprob": -2.46875,
|
||||
"text": " poem"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -5.78125,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 1707,
|
||||
"logprob": -6.375,
|
||||
"text": " help"
|
||||
},
|
||||
{
|
||||
"id": 682,
|
||||
"logprob": -2.125,
|
||||
"text": " me"
|
||||
},
|
||||
{
|
||||
"id": 5434,
|
||||
"logprob": -1.59375,
|
||||
"text": " remember"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.62890625,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1370,
|
||||
"logprob": -6.625,
|
||||
"text": " first"
|
||||
},
|
||||
{
|
||||
"id": 235248,
|
||||
"logprob": -1.7421875,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 235274,
|
||||
"logprob": -0.44921875,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 235276,
|
||||
"logprob": -0.07128906,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 6635,
|
||||
"logprob": -2.109375,
|
||||
"text": " elements"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.40429688,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.0009918213,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26163,
|
||||
"logprob": -0.03540039,
|
||||
"text": " periodic"
|
||||
},
|
||||
{
|
||||
"id": 3037,
|
||||
"logprob": -0.00028800964,
|
||||
"text": " table"
|
||||
},
|
||||
{
|
||||
"id": 235269,
|
||||
"logprob": -4.71875,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 7385,
|
||||
"logprob": -11.875,
|
||||
"text": " giving"
|
||||
},
|
||||
{
|
||||
"id": 1853,
|
||||
"logprob": -4.875,
|
||||
"text": " each"
|
||||
},
|
||||
{
|
||||
"id": 5356,
|
||||
"logprob": -0.38867188,
|
||||
"text": " element"
|
||||
},
|
||||
{
|
||||
"id": 1277,
|
||||
"logprob": -3.65625,
|
||||
"text": " its"
|
||||
},
|
||||
{
|
||||
"id": 1997,
|
||||
"logprob": -4.4375,
|
||||
"text": " own"
|
||||
},
|
||||
{
|
||||
"id": 2017,
|
||||
"logprob": -0.3046875,
|
||||
"text": " line"
|
||||
},
|
||||
{
|
||||
"id": 235265,
|
||||
"logprob": -0.16113281,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 107,
|
||||
"logprob": -25.625,
|
||||
"text": "<end_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -6.75,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -39.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 2516,
|
||||
"logprob": -32.5,
|
||||
"text": "model"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -10.1875,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -3.296875,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -512,188 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -47.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 1645,
|
||||
"logprob": -18.875,
|
||||
"text": "user"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -7.15625,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -4.78125,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 5559,
|
||||
"logprob": -10.0,
|
||||
"text": "Write"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.1171875,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 19592,
|
||||
"logprob": -2.46875,
|
||||
"text": " poem"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -5.84375,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 1707,
|
||||
"logprob": -6.375,
|
||||
"text": " help"
|
||||
},
|
||||
{
|
||||
"id": 682,
|
||||
"logprob": -2.125,
|
||||
"text": " me"
|
||||
},
|
||||
{
|
||||
"id": 5434,
|
||||
"logprob": -1.546875,
|
||||
"text": " remember"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.62890625,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1370,
|
||||
"logprob": -6.65625,
|
||||
"text": " first"
|
||||
},
|
||||
{
|
||||
"id": 235248,
|
||||
"logprob": -1.84375,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 235274,
|
||||
"logprob": -0.45117188,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 235276,
|
||||
"logprob": -0.07421875,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 6635,
|
||||
"logprob": -2.109375,
|
||||
"text": " elements"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.4140625,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.0009536743,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26163,
|
||||
"logprob": -0.033203125,
|
||||
"text": " periodic"
|
||||
},
|
||||
{
|
||||
"id": 3037,
|
||||
"logprob": -0.0002670288,
|
||||
"text": " table"
|
||||
},
|
||||
{
|
||||
"id": 235269,
|
||||
"logprob": -4.75,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 7385,
|
||||
"logprob": -11.625,
|
||||
"text": " giving"
|
||||
},
|
||||
{
|
||||
"id": 1853,
|
||||
"logprob": -4.875,
|
||||
"text": " each"
|
||||
},
|
||||
{
|
||||
"id": 5356,
|
||||
"logprob": -0.38867188,
|
||||
"text": " element"
|
||||
},
|
||||
{
|
||||
"id": 1277,
|
||||
"logprob": -3.65625,
|
||||
"text": " its"
|
||||
},
|
||||
{
|
||||
"id": 1997,
|
||||
"logprob": -4.4375,
|
||||
"text": " own"
|
||||
},
|
||||
{
|
||||
"id": 2017,
|
||||
"logprob": -0.29882812,
|
||||
"text": " line"
|
||||
},
|
||||
{
|
||||
"id": 235265,
|
||||
"logprob": -0.16699219,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 107,
|
||||
"logprob": -25.625,
|
||||
"text": "<end_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -6.75,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -39.5,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 2516,
|
||||
"logprob": -32.5,
|
||||
"text": "model"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -10.125,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -3.421875,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -766,188 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -47.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 1645,
|
||||
"logprob": -18.875,
|
||||
"text": "user"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -7.25,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -4.78125,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 5559,
|
||||
"logprob": -10.0,
|
||||
"text": "Write"
|
||||
},
|
||||
{
|
||||
"id": 476,
|
||||
"logprob": -0.111816406,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 19592,
|
||||
"logprob": -2.46875,
|
||||
"text": " poem"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -5.78125,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 1707,
|
||||
"logprob": -6.375,
|
||||
"text": " help"
|
||||
},
|
||||
{
|
||||
"id": 682,
|
||||
"logprob": -2.125,
|
||||
"text": " me"
|
||||
},
|
||||
{
|
||||
"id": 5434,
|
||||
"logprob": -1.59375,
|
||||
"text": " remember"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.62890625,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 1370,
|
||||
"logprob": -6.625,
|
||||
"text": " first"
|
||||
},
|
||||
{
|
||||
"id": 235248,
|
||||
"logprob": -1.7421875,
|
||||
"text": " "
|
||||
},
|
||||
{
|
||||
"id": 235274,
|
||||
"logprob": -0.44921875,
|
||||
"text": "1"
|
||||
},
|
||||
{
|
||||
"id": 235276,
|
||||
"logprob": -0.07128906,
|
||||
"text": "0"
|
||||
},
|
||||
{
|
||||
"id": 6635,
|
||||
"logprob": -2.109375,
|
||||
"text": " elements"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.40429688,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.0009918213,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 26163,
|
||||
"logprob": -0.03540039,
|
||||
"text": " periodic"
|
||||
},
|
||||
{
|
||||
"id": 3037,
|
||||
"logprob": -0.00028800964,
|
||||
"text": " table"
|
||||
},
|
||||
{
|
||||
"id": 235269,
|
||||
"logprob": -4.71875,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 7385,
|
||||
"logprob": -11.875,
|
||||
"text": " giving"
|
||||
},
|
||||
{
|
||||
"id": 1853,
|
||||
"logprob": -4.875,
|
||||
"text": " each"
|
||||
},
|
||||
{
|
||||
"id": 5356,
|
||||
"logprob": -0.38867188,
|
||||
"text": " element"
|
||||
},
|
||||
{
|
||||
"id": 1277,
|
||||
"logprob": -3.65625,
|
||||
"text": " its"
|
||||
},
|
||||
{
|
||||
"id": 1997,
|
||||
"logprob": -4.4375,
|
||||
"text": " own"
|
||||
},
|
||||
{
|
||||
"id": 2017,
|
||||
"logprob": -0.3046875,
|
||||
"text": " line"
|
||||
},
|
||||
{
|
||||
"id": 235265,
|
||||
"logprob": -0.16113281,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 107,
|
||||
"logprob": -25.625,
|
||||
"text": "<end_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -6.75,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 106,
|
||||
"logprob": -39.25,
|
||||
"text": "<start_of_turn>"
|
||||
},
|
||||
{
|
||||
"id": 2516,
|
||||
"logprob": -32.5,
|
||||
"text": "model"
|
||||
},
|
||||
{
|
||||
"id": 235292,
|
||||
"logprob": -10.1875,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 108,
|
||||
"logprob": -3.296875,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.640625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.6484375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.3671875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.6484375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.359375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.6484375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.640625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.3671875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": null,
|
||||
"text": "<bos>"
|
||||
},
|
||||
{
|
||||
"id": 2015,
|
||||
"logprob": -9.6484375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -10.359375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2061,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"logprob": -3.1835938,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 2769,
|
||||
"logprob": -9.171875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 4673,
|
||||
"logprob": -1.6425781,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -0.7314453,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,33 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2061,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"logprob": -3.1835938,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 2769,
|
||||
"logprob": -9.171875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 4673,
|
||||
"logprob": -1.6425781,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -0.7314453,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -103,33 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2061,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"logprob": -3.1660156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 2769,
|
||||
"logprob": -9.1796875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 4673,
|
||||
"logprob": -1.6376953,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -0.72216797,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -202,33 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2061,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"logprob": -3.1660156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 2769,
|
||||
"logprob": -9.1796875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 4673,
|
||||
"logprob": -1.6376953,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -0.72216797,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -301,33 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2061,
|
||||
"logprob": null,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 318,
|
||||
"logprob": -3.1660156,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 2769,
|
||||
"logprob": -9.1796875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 4673,
|
||||
"logprob": -1.6376953,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -0.72216797,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -13.90625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -12.328125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,88 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 30,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 5235,
|
||||
"logprob": -10.0625,
|
||||
"text": "info"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.2324219,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 13260,
|
||||
"logprob": -10.625,
|
||||
"text": "dav"
|
||||
},
|
||||
{
|
||||
"id": 333,
|
||||
"logprob": -0.08276367,
|
||||
"text": "id"
|
||||
},
|
||||
{
|
||||
"id": 8753,
|
||||
"logprob": -7.5273438,
|
||||
"text": "hol"
|
||||
},
|
||||
{
|
||||
"id": 17559,
|
||||
"logprob": -3.8476562,
|
||||
"text": "tz"
|
||||
},
|
||||
{
|
||||
"id": 763,
|
||||
"logprob": -10.140625,
|
||||
"text": "like"
|
||||
},
|
||||
{
|
||||
"id": 10697,
|
||||
"logprob": -10.1953125,
|
||||
"text": "trees"
|
||||
},
|
||||
{
|
||||
"id": 322,
|
||||
"logprob": -2.5742188,
|
||||
"text": "and"
|
||||
},
|
||||
{
|
||||
"id": 756,
|
||||
"logprob": -7.4882812,
|
||||
"text": "has"
|
||||
},
|
||||
{
|
||||
"id": 1023,
|
||||
"logprob": -5.0507812,
|
||||
"text": "two"
|
||||
},
|
||||
{
|
||||
"id": 274,
|
||||
"logprob": -5.3164062,
|
||||
"text": "c"
|
||||
},
|
||||
{
|
||||
"id": 1446,
|
||||
"logprob": -0.6694336,
|
||||
"text": "ats"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -0.9995117,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 29871,
|
||||
"logprob": -4.2421875,
|
||||
"text": ""
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,53 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1024,
|
||||
"logprob": -10.578125,
|
||||
"text": "name"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.0332031,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 13260,
|
||||
"logprob": -9.171875,
|
||||
"text": "dav"
|
||||
},
|
||||
{
|
||||
"id": 333,
|
||||
"logprob": -0.04257202,
|
||||
"text": "id"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -2.4785156,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 4876,
|
||||
"logprob": -10.7890625,
|
||||
"text": "email"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.32495117,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 259,
|
||||
"logprob": -9.4921875,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -123,53 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1024,
|
||||
"logprob": -10.578125,
|
||||
"text": "name"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.03125,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 13260,
|
||||
"logprob": -9.171875,
|
||||
"text": "dav"
|
||||
},
|
||||
{
|
||||
"id": 333,
|
||||
"logprob": -0.04244995,
|
||||
"text": "id"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -2.4863281,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 4876,
|
||||
"logprob": -10.7890625,
|
||||
"text": "email"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.32714844,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 259,
|
||||
"logprob": -9.4921875,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -242,53 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1024,
|
||||
"logprob": -10.578125,
|
||||
"text": "name"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.0332031,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 13260,
|
||||
"logprob": -9.171875,
|
||||
"text": "dav"
|
||||
},
|
||||
{
|
||||
"id": 333,
|
||||
"logprob": -0.04257202,
|
||||
"text": "id"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -2.4785156,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 4876,
|
||||
"logprob": -10.7890625,
|
||||
"text": "email"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.32495117,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 259,
|
||||
"logprob": -9.4921875,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -361,53 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1024,
|
||||
"logprob": -10.578125,
|
||||
"text": "name"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -3.0332031,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 13260,
|
||||
"logprob": -9.171875,
|
||||
"text": "dav"
|
||||
},
|
||||
{
|
||||
"id": 333,
|
||||
"logprob": -0.04257202,
|
||||
"text": "id"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -2.4785156,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 4876,
|
||||
"logprob": -10.7890625,
|
||||
"text": "email"
|
||||
},
|
||||
{
|
||||
"id": 29901,
|
||||
"logprob": -0.32495117,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 259,
|
||||
"logprob": -9.4921875,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,43 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 806,
|
||||
"logprob": -11.890625,
|
||||
"text": "Wh"
|
||||
},
|
||||
{
|
||||
"id": 1446,
|
||||
"logprob": -3.6699219,
|
||||
"text": "ats"
|
||||
},
|
||||
{
|
||||
"id": 2921,
|
||||
"logprob": -7.8203125,
|
||||
"text": "Go"
|
||||
},
|
||||
{
|
||||
"id": 468,
|
||||
"logprob": -8.0703125,
|
||||
"text": "og"
|
||||
},
|
||||
{
|
||||
"id": 793,
|
||||
"logprob": -2.1875,
|
||||
"text": "les"
|
||||
},
|
||||
{
|
||||
"id": 16332,
|
||||
"logprob": -9.7109375,
|
||||
"text": "DNS"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "stop_sequence",
|
||||
"generated_tokens": 5,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -8.6875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.546875,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,18 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.4375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,18 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.453125,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,18 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.453125,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -88,18 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.40625,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -172,18 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.421875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -256,18 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.4140625,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.421875,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.546875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.5234375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.421875,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.5625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.5625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.5625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": -9.5625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -10.375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.1875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.93359375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.1796875,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.75,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -18.0,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -11.8359375,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -2.0703125,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -5.9765625,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.1875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.93359375,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.875,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.1796875,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.75,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -108,38 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.21875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.95703125,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.9375,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.1328125,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.75,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -212,38 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.21875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.95703125,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.9375,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.1328125,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.75,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -316,38 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 128000,
|
||||
"logprob": null,
|
||||
"text": "<|begin_of_text|>"
|
||||
},
|
||||
{
|
||||
"id": 3923,
|
||||
"logprob": -6.21875,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 374,
|
||||
"logprob": -0.95703125,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 5655,
|
||||
"logprob": -9.9375,
|
||||
"text": " deep"
|
||||
},
|
||||
{
|
||||
"id": 6975,
|
||||
"logprob": -1.1328125,
|
||||
"text": " learning"
|
||||
},
|
||||
{
|
||||
"id": 30,
|
||||
"logprob": -1.75,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,18 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,18 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,18 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -88,18 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -172,18 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -256,18 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 2323,
|
||||
"logprob": null,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 1715,
|
||||
"logprob": -11.34375,
|
||||
"text": " request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -12.390625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -11.0625,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 4321,
|
||||
"logprob": -9.0859375,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2009,
|
||||
"logprob": -16.359375,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -10.0078125,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -15.515625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -2.8847656,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -4.140625,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,38 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -10.734375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.5488281,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.2890625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.2753906,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.48046875,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -107,38 +76,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -10.734375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.5488281,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.2890625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.2724609,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.47729492,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -210,38 +148,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -10.734375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.5488281,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.2890625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.2724609,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.47729492,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -313,38 +220,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -10.734375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.5488281,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.2890625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.2724609,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.47729492,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,38 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1724,
|
||||
"logprob": -10.734375,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 338,
|
||||
"logprob": -1.5488281,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21784,
|
||||
"logprob": -9.2890625,
|
||||
"text": "Deep"
|
||||
},
|
||||
{
|
||||
"id": 29257,
|
||||
"logprob": -1.2753906,
|
||||
"text": "Learning"
|
||||
},
|
||||
{
|
||||
"id": 29973,
|
||||
"logprob": -0.48046875,
|
||||
"text": "?"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,23 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,23 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -93,23 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -182,23 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -271,23 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 3735,
|
||||
"logprob": -12.9140625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
"id": 2159,
|
||||
"logprob": -10.7578125,
|
||||
"text": "request"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,48 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1824,
|
||||
"logprob": -6.1445312,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -1.4648438,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21135,
|
||||
"logprob": -13.6875,
|
||||
"text": "gradient"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -1.6005859,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.39526367,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.640625,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.18774414,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -3,33 +3,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -17.234375,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -7.4375,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.8046875,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.33032227,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": 0,
|
||||
"tokens": [
|
||||
{
|
||||
|
@ -4,48 +4,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1824,
|
||||
"logprob": -6.1445312,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -1.4648438,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21135,
|
||||
"logprob": -13.6875,
|
||||
"text": "gradient"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -1.6005859,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.39526367,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.640625,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.18774414,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -118,48 +77,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1824,
|
||||
"logprob": -6.1445312,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -1.4677734,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21135,
|
||||
"logprob": -13.6875,
|
||||
"text": "gradient"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -1.6015625,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.39453125,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.6435547,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.18713379,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -232,48 +150,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1824,
|
||||
"logprob": -6.140625,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -1.4658203,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21135,
|
||||
"logprob": -13.6796875,
|
||||
"text": "gradient"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -1.5898438,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.3955078,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.64501953,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.18493652,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
@ -346,48 +223,7 @@
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 10,
|
||||
"prefill": [
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": null,
|
||||
"text": "<s>"
|
||||
},
|
||||
{
|
||||
"id": 1824,
|
||||
"logprob": -6.1328125,
|
||||
"text": "What"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -1.4658203,
|
||||
"text": "is"
|
||||
},
|
||||
{
|
||||
"id": 21135,
|
||||
"logprob": -13.6796875,
|
||||
"text": "gradient"
|
||||
},
|
||||
{
|
||||
"id": 24871,
|
||||
"logprob": -1.5947266,
|
||||
"text": "descent"
|
||||
},
|
||||
{
|
||||
"id": 28804,
|
||||
"logprob": -0.39648438,
|
||||
"text": "?"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.6464844,
|
||||
"text": "\n"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.18688965,
|
||||
"text": "\n"
|
||||
}
|
||||
],
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user