mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
fix: bump utopia, openapi doc version and improve test
This commit is contained in:
parent
ac50b14afb
commit
bddcf9be6c
132
Cargo.lock
generated
132
Cargo.lock
generated
@ -128,6 +128,9 @@ name = "arbitrary"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
|
||||
dependencies = [
|
||||
"derive_arbitrary",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
@ -305,7 +308,7 @@ dependencies = [
|
||||
"http-body 0.4.6",
|
||||
"hyper 0.14.32",
|
||||
"itoa",
|
||||
"matchit",
|
||||
"matchit 0.7.3",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
@ -338,7 +341,41 @@ dependencies = [
|
||||
"hyper 1.6.0",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit",
|
||||
"matchit 0.7.3",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_path_to_error",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tokio",
|
||||
"tower 0.5.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d6fd624c75e18b3b4c6b9caf42b1afe24437daaee904069137d8bab077be8b8"
|
||||
dependencies = [
|
||||
"axum-core 0.5.0",
|
||||
"bytes",
|
||||
"form_urlencoded",
|
||||
"futures-util",
|
||||
"http 1.2.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"hyper 1.6.0",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit 0.8.4",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
@ -394,6 +431,26 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df1362f362fd16024ae199c1970ce98f9661bf5ef94b9808fee734bc3698b733"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.2.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-tracing-opentelemetry"
|
||||
version = "0.16.0"
|
||||
@ -1108,6 +1165,17 @@ dependencies = [
|
||||
"powerfmt",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.98",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_builder"
|
||||
version = "0.20.2"
|
||||
@ -2387,6 +2455,12 @@ dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lockfree-object-pool"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.25"
|
||||
@ -2448,6 +2522,12 @@ version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.8.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3"
|
||||
|
||||
[[package]]
|
||||
name = "maybe-rayon"
|
||||
version = "0.1.1"
|
||||
@ -4704,7 +4784,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"axum 0.8.1",
|
||||
"axum-tracing-opentelemetry",
|
||||
"base64 0.22.1",
|
||||
"chrono",
|
||||
@ -4772,7 +4852,7 @@ version = "3.1.1-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"axum 0.8.1",
|
||||
"axum-tracing-opentelemetry",
|
||||
"base64 0.22.1",
|
||||
"clap 4.5.30",
|
||||
@ -4821,7 +4901,7 @@ version = "3.1.1-dev0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum 0.7.9",
|
||||
"axum 0.8.1",
|
||||
"axum-tracing-opentelemetry",
|
||||
"base64 0.22.1",
|
||||
"clap 4.5.30",
|
||||
@ -5559,9 +5639,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "utoipa"
|
||||
version = "4.2.3"
|
||||
version = "5.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23"
|
||||
checksum = "435c6f69ef38c9017b4b4eea965dfb91e71e53d869e896db40d1cf2441dd75c0"
|
||||
dependencies = [
|
||||
"indexmap 2.7.1",
|
||||
"serde",
|
||||
@ -5571,11 +5651,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "utoipa-gen"
|
||||
version = "4.3.1"
|
||||
version = "5.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392"
|
||||
checksum = "a77d306bc75294fd52f3e99b13ece67c02c1a2789190a6f31d32f736624326f7"
|
||||
dependencies = [
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
@ -5584,16 +5663,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "utoipa-swagger-ui"
|
||||
version = "6.0.0"
|
||||
version = "9.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b39868d43c011961e04b41623e050aedf2cc93652562ff7935ce0f819aaf2da"
|
||||
checksum = "161166ec520c50144922a625d8bc4925cc801b2dda958ab69878527c0e5c5d61"
|
||||
dependencies = [
|
||||
"axum 0.7.9",
|
||||
"axum 0.8.1",
|
||||
"base64 0.22.1",
|
||||
"mime_guess",
|
||||
"regex",
|
||||
"rust-embed",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"url",
|
||||
"utoipa",
|
||||
"zip",
|
||||
]
|
||||
@ -6323,14 +6404,33 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.6.6"
|
||||
version = "2.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261"
|
||||
checksum = "ae9c1ea7b3a5e1f4b922ff856a129881167511563dc219869afe3787fc0c1a45"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"arbitrary",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"displaydoc",
|
||||
"flate2",
|
||||
"indexmap 2.7.1",
|
||||
"memchr",
|
||||
"thiserror 2.0.11",
|
||||
"zopfli",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zopfli"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"crc32fast",
|
||||
"lockfree-object-pool",
|
||||
"log",
|
||||
"once_cell",
|
||||
"simd-adler32",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -16,7 +16,7 @@ path = "src/main.rs"
|
||||
[dependencies]
|
||||
async-trait = "0.1.74"
|
||||
async-stream = "0.3.5"
|
||||
axum = { version = "0.7", features = ["json"] }
|
||||
axum = { version = "0.8", features = ["json"] }
|
||||
axum-tracing-opentelemetry = "0.16"
|
||||
text-generation-router = { path = "../../router" }
|
||||
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||
@ -48,8 +48,8 @@ tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing = "0.1.37"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||
utoipa = { version = "5.3.1", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "9.0.0", features = ["axum"] }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||
"opentelemetry-otlp",
|
||||
] }
|
||||
|
@ -16,7 +16,7 @@ path = "src/main.rs"
|
||||
[dependencies]
|
||||
async-trait = "0.1.74"
|
||||
async-stream = "0.3.5"
|
||||
axum = { version = "0.7", features = ["json"] }
|
||||
axum = { version = "0.8", features = ["json"] }
|
||||
axum-tracing-opentelemetry = "0.16"
|
||||
text-generation-router = { path = "../../router" }
|
||||
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||
@ -48,8 +48,8 @@ tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing = "0.1.37"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||
utoipa = { version = "5.3.1", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "9.0.0", features = ["axum"] }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||
"opentelemetry-otlp",
|
||||
] }
|
||||
|
@ -1,5 +1,5 @@
|
||||
{
|
||||
"openapi": "3.0.3",
|
||||
"openapi": "3.1.0",
|
||||
"info": {
|
||||
"title": "Text Generation Inference",
|
||||
"description": "Text Generation Webserver",
|
||||
@ -757,10 +757,12 @@
|
||||
}
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"example": 42,
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"tokens": {
|
||||
@ -829,8 +831,10 @@
|
||||
"$ref": "#/components/schemas/ChatCompletionDelta"
|
||||
},
|
||||
"finish_reason": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
@ -838,12 +842,14 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"logprobs": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ChatCompletionLogprobs"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -880,12 +886,14 @@
|
||||
"type": "string"
|
||||
},
|
||||
"usage": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/Usage"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -906,12 +914,14 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"logprobs": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ChatCompletionLogprobs"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
]
|
||||
},
|
||||
"message": {
|
||||
"$ref": "#/components/schemas/OutputMessage"
|
||||
@ -988,34 +998,42 @@
|
||||
],
|
||||
"properties": {
|
||||
"frequency_penalty": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
||||
"example": "1.0",
|
||||
"nullable": true
|
||||
"example": "1.0"
|
||||
},
|
||||
"logit_bias": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"type": "number",
|
||||
"format": "float"
|
||||
},
|
||||
"description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
|
||||
"nullable": true
|
||||
"description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token."
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "boolean",
|
||||
"type": [
|
||||
"boolean",
|
||||
"null"
|
||||
],
|
||||
"description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
|
||||
"example": "false",
|
||||
"nullable": true
|
||||
"example": "false"
|
||||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||
"default": "1024",
|
||||
"example": "32",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"messages": {
|
||||
@ -1027,107 +1045,136 @@
|
||||
"example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
|
||||
"example": "mistralai/Mistral-7B-Instruct-v0.2",
|
||||
"nullable": true
|
||||
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
},
|
||||
"n": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
|
||||
"example": "2",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"presence_penalty": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
|
||||
"example": 0.1,
|
||||
"nullable": true
|
||||
"example": 0.1
|
||||
},
|
||||
"response_format": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarType"
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarType",
|
||||
"description": "Response format constraints for the generation.\n\nNOTE: A request can use `response_format` OR `tools` but not both."
|
||||
}
|
||||
],
|
||||
"default": "null",
|
||||
"nullable": true
|
||||
"default": "null"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"example": 42,
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"stop": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Up to 4 sequences where the API will stop generating further tokens.",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"example": "null"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"stream_options": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/StreamOptions"
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/StreamOptions",
|
||||
"description": "Options for streaming response. Only set this when you set stream: true."
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
]
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
|
||||
"example": 1.0,
|
||||
"nullable": true
|
||||
"example": 1.0
|
||||
},
|
||||
"tool_choice": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/ToolChoice"
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/ToolChoice",
|
||||
"description": "A specific tool to use. If not provided, the model will default to use any of the tools provided in the tools parameter."
|
||||
}
|
||||
],
|
||||
"default": "auto",
|
||||
"nullable": true
|
||||
"default": "auto"
|
||||
},
|
||||
"tool_prompt": {
|
||||
"type": "string",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "A prompt to be appended before the tools",
|
||||
"example": "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.",
|
||||
"nullable": true
|
||||
"example": "Given the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables."
|
||||
},
|
||||
"tools": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Tool"
|
||||
},
|
||||
"description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"example": "null"
|
||||
},
|
||||
"top_logprobs": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
|
||||
"example": "5",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
|
||||
"example": 0.95,
|
||||
"nullable": true
|
||||
"example": 0.95
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1241,10 +1288,7 @@
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "object"
|
||||
}
|
||||
]
|
||||
},
|
||||
"CompletionComplete": {
|
||||
"type": "object",
|
||||
@ -1263,12 +1307,14 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"logprobs": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"type": "number",
|
||||
"format": "float"
|
||||
},
|
||||
"nullable": true
|
||||
}
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
@ -1320,72 +1366,91 @@
|
||||
],
|
||||
"properties": {
|
||||
"frequency_penalty": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
||||
"example": "1.0",
|
||||
"nullable": true
|
||||
"example": "1.0"
|
||||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "The maximum number of tokens that can be generated in the chat completion.",
|
||||
"default": "1024",
|
||||
"example": "32",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
|
||||
"example": "mistralai/Mistral-7B-Instruct-v0.2",
|
||||
"nullable": true
|
||||
"example": "mistralai/Mistral-7B-Instruct-v0.2"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/Prompt"
|
||||
"$ref": "#/components/schemas/Prompt",
|
||||
"description": "The prompt to generate completions for."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
"type": "number",
|
||||
"format": "float",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"example": 42,
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"stop": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Up to 4 sequences where the API will stop generating further tokens.",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"example": "null"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"suffix": {
|
||||
"type": "string",
|
||||
"description": "The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.\nplease see the completion_template field in the model's tokenizer_config.json file for completion template.",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.\nplease see the completion_template field in the model's tokenizer_config.json file for completion template."
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.",
|
||||
"example": 1.0,
|
||||
"nullable": true
|
||||
"example": 1.0
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
|
||||
"example": 0.95,
|
||||
"nullable": true
|
||||
"example": 0.95
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1424,11 +1489,13 @@
|
||||
],
|
||||
"properties": {
|
||||
"best_of_sequences": {
|
||||
"type": "array",
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/BestOfSequence"
|
||||
},
|
||||
"nullable": true
|
||||
}
|
||||
},
|
||||
"finish_reason": {
|
||||
"$ref": "#/components/schemas/FinishReason"
|
||||
@ -1446,10 +1513,12 @@
|
||||
}
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"example": 42,
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"tokens": {
|
||||
@ -1503,8 +1572,10 @@
|
||||
"type": "string"
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1517,8 +1588,10 @@
|
||||
"properties": {
|
||||
"arguments": {},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"type": "string"
|
||||
@ -1540,18 +1613,22 @@
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"adapter_id": {
|
||||
"type": "string",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"description": "Lora adapter id",
|
||||
"default": "null",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"example": "null"
|
||||
},
|
||||
"best_of": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"description": "Generate best_of sequences and return the one if the highest token logprobs.",
|
||||
"default": "null",
|
||||
"example": 1,
|
||||
"nullable": true,
|
||||
"minimum": 0,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
@ -1572,55 +1649,68 @@
|
||||
"example": true
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "The parameter for frequency penalty. 1.0 means no penalty\nPenalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
|
||||
"default": "null",
|
||||
"example": 0.1,
|
||||
"nullable": true,
|
||||
"exclusiveMinimum": -2
|
||||
},
|
||||
"grammar": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarType"
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/GrammarType",
|
||||
"description": "Grammar constraints for the generation."
|
||||
}
|
||||
],
|
||||
"default": "null",
|
||||
"nullable": true
|
||||
"default": "null"
|
||||
},
|
||||
"max_new_tokens": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "Maximum number of tokens to generate.",
|
||||
"default": "1024",
|
||||
"example": "20",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"repetition_penalty": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "The parameter for repetition penalty. 1.0 means no penalty.\nSee [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.",
|
||||
"default": "null",
|
||||
"example": 1.03,
|
||||
"nullable": true,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"return_full_text": {
|
||||
"type": "boolean",
|
||||
"type": [
|
||||
"boolean",
|
||||
"null"
|
||||
],
|
||||
"description": "Whether to prepend the prompt to the generated text",
|
||||
"default": "null",
|
||||
"example": false,
|
||||
"nullable": true
|
||||
"example": false
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"description": "Random sampling seed.",
|
||||
"default": "null",
|
||||
"example": "null",
|
||||
"nullable": true,
|
||||
"minimum": 0,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
@ -1636,58 +1726,70 @@
|
||||
"maxItems": 4
|
||||
},
|
||||
"temperature": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "The value used to module the logits distribution.",
|
||||
"default": "null",
|
||||
"example": 0.5,
|
||||
"nullable": true,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "The number of highest probability vocabulary tokens to keep for top-k-filtering.",
|
||||
"default": "null",
|
||||
"example": 10,
|
||||
"nullable": true,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"top_n_tokens": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int32",
|
||||
"description": "The number of highest probability vocabulary tokens to keep for top-n-filtering.",
|
||||
"default": "null",
|
||||
"example": 5,
|
||||
"nullable": true,
|
||||
"minimum": 0,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"top_p": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "Top-p value for nucleus sampling.",
|
||||
"default": "null",
|
||||
"example": 0.95,
|
||||
"nullable": true,
|
||||
"maximum": 1,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
"truncate": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"description": "Truncate inputs tokens to the given size.",
|
||||
"default": "null",
|
||||
"example": "null",
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
},
|
||||
"typical_p": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"description": "Typical Decoding mass\nSee [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information.",
|
||||
"default": "null",
|
||||
"example": 0.95,
|
||||
"nullable": true,
|
||||
"maximum": 1,
|
||||
"exclusiveMinimum": 0
|
||||
},
|
||||
@ -1721,12 +1823,14 @@
|
||||
],
|
||||
"properties": {
|
||||
"details": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/Details"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
]
|
||||
},
|
||||
"generated_text": {
|
||||
"type": "string",
|
||||
@ -1738,9 +1842,10 @@
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions.",
|
||||
"required": [
|
||||
"type",
|
||||
"value"
|
||||
"value",
|
||||
"type"
|
||||
],
|
||||
"properties": {
|
||||
"type": {
|
||||
@ -1752,13 +1857,20 @@
|
||||
"value": {
|
||||
"description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
|
||||
}
|
||||
},
|
||||
"example": {
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"required": [
|
||||
"type",
|
||||
"value"
|
||||
"value",
|
||||
"type"
|
||||
],
|
||||
"properties": {
|
||||
"type": {
|
||||
@ -1772,10 +1884,7 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type"
|
||||
}
|
||||
]
|
||||
},
|
||||
"Info": {
|
||||
"type": "object",
|
||||
@ -1793,9 +1902,11 @@
|
||||
],
|
||||
"properties": {
|
||||
"docker_label": {
|
||||
"type": "string",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"example": "null"
|
||||
},
|
||||
"max_best_of": {
|
||||
"type": "integer",
|
||||
@ -1834,14 +1945,18 @@
|
||||
"example": "bigscience/blomm-560m"
|
||||
},
|
||||
"model_pipeline_tag": {
|
||||
"type": "string",
|
||||
"example": "text-generation",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"example": "text-generation"
|
||||
},
|
||||
"model_sha": {
|
||||
"type": "string",
|
||||
"example": "e985a63cdc139290c5f700ff1929f0b5942cced2",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"example": "e985a63cdc139290c5f700ff1929f0b5942cced2"
|
||||
},
|
||||
"router": {
|
||||
"type": "string",
|
||||
@ -1849,9 +1964,11 @@
|
||||
"example": "text-generation-router"
|
||||
},
|
||||
"sha": {
|
||||
"type": "string",
|
||||
"example": "null",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"example": "null"
|
||||
},
|
||||
"validation_workers": {
|
||||
"type": "integer",
|
||||
@ -1867,21 +1984,49 @@
|
||||
"Message": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
"role",
|
||||
"content"
|
||||
"role"
|
||||
],
|
||||
"properties": {
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/MessageContent"
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MessageChunk"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"example": "\"David\"",
|
||||
"nullable": true
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"example": "\"David\""
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"example": "user"
|
||||
},
|
||||
"tool_calls": {
|
||||
"type": [
|
||||
"array",
|
||||
"null"
|
||||
],
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolCall"
|
||||
},
|
||||
"example": "null"
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1923,10 +2068,7 @@
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type"
|
||||
}
|
||||
]
|
||||
},
|
||||
"MessageContent": {
|
||||
"oneOf": [
|
||||
@ -1995,10 +2137,12 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"logprob": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"example": -0.34,
|
||||
"nullable": true
|
||||
"example": -0.34
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
@ -2106,10 +2250,12 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer",
|
||||
"type": [
|
||||
"integer",
|
||||
"null"
|
||||
],
|
||||
"format": "int64",
|
||||
"example": 42,
|
||||
"nullable": true,
|
||||
"minimum": 0
|
||||
}
|
||||
}
|
||||
@ -2135,19 +2281,23 @@
|
||||
],
|
||||
"properties": {
|
||||
"details": {
|
||||
"allOf": [
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/StreamDetails"
|
||||
}
|
||||
],
|
||||
"default": "null",
|
||||
"nullable": true
|
||||
"default": "null"
|
||||
},
|
||||
"generated_text": {
|
||||
"type": "string",
|
||||
"type": [
|
||||
"string",
|
||||
"null"
|
||||
],
|
||||
"default": "null",
|
||||
"example": "test",
|
||||
"nullable": true
|
||||
"example": "test"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
@ -2198,10 +2348,12 @@
|
||||
"minimum": 0
|
||||
},
|
||||
"logprob": {
|
||||
"type": "number",
|
||||
"type": [
|
||||
"number",
|
||||
"null"
|
||||
],
|
||||
"format": "float",
|
||||
"example": -0.34,
|
||||
"nullable": true
|
||||
"example": -0.34
|
||||
},
|
||||
"special": {
|
||||
"type": "boolean",
|
||||
@ -2314,12 +2466,14 @@
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Forces the model to call a specific tool. This structure aligns with the `OpenAI` API schema to force a specific tool.",
|
||||
"required": [
|
||||
"function"
|
||||
],
|
||||
"properties": {
|
||||
"function": {
|
||||
"$ref": "#/components/schemas/FunctionName"
|
||||
"$ref": "#/components/schemas/FunctionName",
|
||||
"description": "Forces the model to call a specific tool. This structure aligns with the `OpenAI` API schema to force a specific tool."
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,11 @@
|
||||
{
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "length",
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "I'm an AI and do not have access to real-time data. However, based on location information (Paris) I can provide general information. \n\nThe temperature in Paris varies widely throughout the year. In the summer (June to August), the average high temperature is around 23°C (73°F), while in the winter (December to February), the average low temperature is around -1°C (30°F). \n\nTo get the current weather in Paris, I recommend checking a weather website or",
|
||||
"content": "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from this information, and I recommend checking the forecast on a reliable weather website for the most up-to-date information.",
|
||||
"name": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
@ -13,14 +13,14 @@
|
||||
"usage": null
|
||||
}
|
||||
],
|
||||
"created": 1739903191,
|
||||
"created": 1739932427,
|
||||
"id": "",
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"object": "chat.completion",
|
||||
"system_fingerprint": "3.1.1-dev0-native",
|
||||
"usage": {
|
||||
"completion_tokens": 100,
|
||||
"completion_tokens": 79,
|
||||
"prompt_tokens": 103,
|
||||
"total_tokens": 203
|
||||
"total_tokens": 182
|
||||
}
|
||||
}
|
||||
|
@ -477,7 +477,7 @@ async def test_flash_llama_tool_reply_response(
|
||||
):
|
||||
responses = await flash_llama_grammar_tools.chat(
|
||||
max_tokens=100,
|
||||
seed=43,
|
||||
seed=42,
|
||||
messages=[
|
||||
{"role": "user", "content": "What's the weather like in Paris today?"},
|
||||
{
|
||||
@ -503,7 +503,7 @@ async def test_flash_llama_tool_reply_response(
|
||||
assert responses.choices[0].message.tool_calls is None
|
||||
assert (
|
||||
responses.choices[0].message.content
|
||||
== "I'm an AI and do not have access to real-time data. However, based on location information (Paris) I can provide general information. \n\nThe temperature in Paris varies widely throughout the year. In the summer (June to August), the average high temperature is around 23°C (73°F), while in the winter (December to February), the average low temperature is around -1°C (30°F). \n\nTo get the current weather in Paris, I recommend checking a weather website or"
|
||||
== "I can't access real-time data, but I can provide you with current conditions and forecast for Paris, France:\n\nThe current conditions in Paris are mostly cloudy with a temperature of 6.7°C (44.1°F). \n\nPlease note that the actual weather may differ from this information, and I recommend checking the forecast on a reliable weather website for the most up-to-date information."
|
||||
)
|
||||
|
||||
assert responses == response_snapshot
|
||||
|
@ -11,7 +11,7 @@ homepage.workspace = true
|
||||
anyhow = "1"
|
||||
async-trait = "0.1.74"
|
||||
async-stream = "0.3.5"
|
||||
axum = { version = "0.7", features = ["json"] }
|
||||
axum = { version = "0.8", features = ["json"] }
|
||||
axum-tracing-opentelemetry = "0.16"
|
||||
clap = { version = "4.4.5", features = ["derive", "env"] }
|
||||
futures = "0.3.28"
|
||||
@ -42,8 +42,8 @@ tower-http = { version = "0.5.1", features = ["cors"] }
|
||||
tracing = "0.1.40"
|
||||
tracing-opentelemetry = "0.21.0"
|
||||
tracing-subscriber = { version = "0.3.18", features = ["json", "env-filter"] }
|
||||
utoipa = { version = "4.2.0", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "6.0.0", features = ["axum"] }
|
||||
utoipa = { version = "5.3.1", features = ["axum_extras"] }
|
||||
utoipa-swagger-ui = { version = "9.0.0", features = ["axum"] }
|
||||
ngrok = { version = "0.13.1", features = ["axum"], optional = true }
|
||||
init-tracing-opentelemetry = { version = "0.14.1", features = [
|
||||
"opentelemetry-otlp",
|
||||
|
@ -949,19 +949,24 @@ mod tests {
|
||||
Message {
|
||||
name: None,
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::SingleText(
|
||||
content: Some(MessageContent::SingleText(
|
||||
"I'd like to show off how chat templating works!".to_string(),
|
||||
),
|
||||
)),
|
||||
tool_calls: None,
|
||||
},
|
||||
Message {
|
||||
name: None,
|
||||
role: "assistant".to_string(),
|
||||
content: MessageContent::SingleText("Great! How can I help you today?".to_string()),
|
||||
content: Some(MessageContent::SingleText(
|
||||
"Great! How can I help you today?".to_string(),
|
||||
)),
|
||||
tool_calls: None,
|
||||
},
|
||||
Message {
|
||||
name: None,
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::SingleText("Just testing".to_string()),
|
||||
content: Some(MessageContent::SingleText("Just testing".to_string())),
|
||||
tool_calls: None,
|
||||
},
|
||||
];
|
||||
let tools_string = r#"[{"type": "function","function": {"name": "get_current_weather","description": "Get the current weather","parameters": {"type": "object","properties": {"location": {"type": "string","description": "The city and state, e.g. San Francisco, CA"},"format": {"type": "string","enum": ["celsius", "fahrenheit"],"description": "The temperature unit to use. Infer this from the users location."}},"required": ["location", "format"]}}}]"#.to_string();
|
||||
@ -985,17 +990,19 @@ mod tests {
|
||||
Message {
|
||||
name: None,
|
||||
role: "system".to_string(),
|
||||
content: MessageContent::SingleText(
|
||||
content: Some(MessageContent::SingleText(
|
||||
"Youre a helpful assistant! Answer the users question best you can."
|
||||
.to_string(),
|
||||
),
|
||||
)),
|
||||
tool_calls: None,
|
||||
},
|
||||
Message {
|
||||
name: None,
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::SingleText(
|
||||
content: Some(MessageContent::SingleText(
|
||||
"What is the weather like in Brooklyn, New York?".to_string(),
|
||||
),
|
||||
)),
|
||||
tool_calls: None,
|
||||
},
|
||||
];
|
||||
let tools_string = r#"[{"type": "function","function": {"name": "get_current_weather","description": "Get the current weather","parameters": {"type": "object","properties": {"location": {"type": "string","description": "The city and state, e.g. San Francisco, CA"},"format": {"type": "string","enum": ["celsius", "fahrenheit"],"description": "The temperature unit to use. Infer this from the users location."}},"required": ["location", "format"]}}}]"#.to_string();
|
||||
|
@ -1025,7 +1025,7 @@ pub fn default_tool_prompt() -> String {
|
||||
"\nGiven the functions available, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. Respond in the format {name: function name, parameters: dictionary of argument name and its value}.Do not use variables.\n".to_string()
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
|
||||
#[derive(Clone, Debug, Deserialize, ToSchema, PartialEq, Serialize)]
|
||||
#[serde(tag = "type")]
|
||||
pub enum TypedChoice {
|
||||
#[serde(rename = "function")]
|
||||
@ -1100,19 +1100,19 @@ pub struct JsonSchemaTool {
|
||||
properties: Properties,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, PartialEq)]
|
||||
struct FunctionsMap {
|
||||
#[serde(rename = "$functions")]
|
||||
functions: std::collections::HashMap<String, serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, PartialEq)]
|
||||
struct FunctionRef {
|
||||
#[serde(rename = "$ref")]
|
||||
ref_path: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq)]
|
||||
#[derive(Debug, Serialize, Deserialize, ToSchema, PartialEq)]
|
||||
struct Properties {
|
||||
#[serde(serialize_with = "serialize_function")]
|
||||
function: Vec<FunctionRef>,
|
||||
@ -1176,7 +1176,7 @@ pub enum MessageChunk {
|
||||
ImageUrl { image_url: Url },
|
||||
}
|
||||
|
||||
#[derive(Clone, Deserialize, ToSchema, Serialize, Debug, PartialEq)]
|
||||
#[derive(Clone, Deserialize, ToSchema, Serialize, Debug, PartialEq, Default)]
|
||||
pub struct Message {
|
||||
#[schema(example = "user")]
|
||||
role: String,
|
||||
@ -1185,6 +1185,7 @@ pub struct Message {
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[schema(example = "\"David\"")]
|
||||
name: Option<String>,
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
tool_calls: Option<Vec<ToolCall>>,
|
||||
}
|
||||
|
||||
@ -1573,8 +1574,11 @@ mod tests {
|
||||
request.messages[0],
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::SingleText("What is Deep Learning?".to_string()),
|
||||
name: None
|
||||
content: Some(MessageContent::SingleText(
|
||||
"What is Deep Learning?".to_string()
|
||||
)),
|
||||
name: None,
|
||||
tool_calls: None
|
||||
}
|
||||
);
|
||||
}
|
||||
@ -1626,11 +1630,12 @@ mod tests {
|
||||
request.messages[0],
|
||||
Message{
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::MultipleChunks(vec![
|
||||
content: Some(MessageContent::MultipleChunks(vec![
|
||||
MessageChunk::Text { text: "Whats in this image?".to_string() },
|
||||
MessageChunk::ImageUrl { image_url: Url { url: "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png".to_string() }},
|
||||
]),
|
||||
name: None
|
||||
])),
|
||||
name: None,
|
||||
tool_calls: None
|
||||
}
|
||||
);
|
||||
}
|
||||
@ -1639,11 +1644,12 @@ mod tests {
|
||||
fn text_message_convert() {
|
||||
let message = Message{
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::MultipleChunks(vec![
|
||||
content: Some(MessageContent::MultipleChunks(vec![
|
||||
MessageChunk::Text { text: "Whats in this image?".to_string() },
|
||||
MessageChunk::ImageUrl { image_url: Url { url: "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png".to_string() } }
|
||||
]),
|
||||
name: None
|
||||
])),
|
||||
name: None,
|
||||
tool_calls: None
|
||||
};
|
||||
let textmsg: TextMessage = message.into();
|
||||
assert_eq!(textmsg.content, "Whats in this image?");
|
||||
|
@ -49,8 +49,8 @@ request_body = SagemakerRequest,
|
||||
responses(
|
||||
(status = 200, description = "Generated Chat Completion",
|
||||
content(
|
||||
("application/json" = SagemakerResponse),
|
||||
("text/event-stream" = SagemakerStreamResponse),
|
||||
(SagemakerResponse = "application/json"),
|
||||
(SagemakerStreamResponse = "text/event-stream"),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||
|
@ -111,8 +111,9 @@ request_body = CompatGenerateRequest,
|
||||
responses(
|
||||
(status = 200, description = "Generated Text",
|
||||
content(
|
||||
("application/json" = Vec<GenerateResponse>),
|
||||
("text/event-stream" = StreamResponse),
|
||||
(Vec<GenerateResponse> = "application/json"),
|
||||
(Vec<GenerateResponse> = "application/json"),
|
||||
(StreamResponse = "text/event-stream"),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
@ -441,17 +442,17 @@ responses(
|
||||
(status = 200, description = "Generated Text", body = StreamResponse,
|
||||
content_type = "text/event-stream"),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"}),
|
||||
content_type = "text/event-stream"),
|
||||
content_type = "text/event-stream",
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||
example = json ! ({"error": "Model is overloaded"}),
|
||||
content_type = "text/event-stream"),
|
||||
content_type = "text/event-stream",
|
||||
example = json!({"error": "Model is overloaded"})),
|
||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Input validation error"}),
|
||||
content_type = "text/event-stream"),
|
||||
content_type = "text/event-stream",
|
||||
example = json!({"error": "Input validation error"})),
|
||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||
example = json ! ({"error": "Incomplete generation"}),
|
||||
content_type = "text/event-stream"),
|
||||
content_type = "text/event-stream",
|
||||
example = json!({"error": "Incomplete generation"})),
|
||||
)
|
||||
)]
|
||||
#[instrument(
|
||||
@ -675,8 +676,8 @@ request_body = CompletionRequest,
|
||||
responses(
|
||||
(status = 200, description = "Generated Chat Completion",
|
||||
content(
|
||||
("application/json" = CompletionFinal),
|
||||
("text/event-stream" = Chunk),
|
||||
(CompletionFinal= "application/json"),
|
||||
(Chunk= "text/event-stream"),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
@ -1201,8 +1202,8 @@ request_body = ChatRequest,
|
||||
responses(
|
||||
(status = 200, description = "Generated Chat Completion",
|
||||
content(
|
||||
("application/json" = ChatCompletion),
|
||||
("text/event-stream" = ChatCompletionChunk),
|
||||
(ChatCompletion = "application/json"),
|
||||
(ChatCompletionChunk = "text/event-stream"),
|
||||
)),
|
||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||
example = json ! ({"error": "Request failed during generation"})),
|
||||
|
@ -170,8 +170,11 @@ mod tests {
|
||||
instances: vec![VertexInstance::Chat(ChatRequest {
|
||||
messages: vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: MessageContent::SingleText("What's Deep Learning?".to_string()),
|
||||
content: Some(MessageContent::SingleText(
|
||||
"What's Deep Learning?".to_string()
|
||||
)),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
},],
|
||||
max_tokens: Some(128),
|
||||
top_p: Some(0.95),
|
||||
|
Loading…
Reference in New Issue
Block a user