diff --git a/.redocly.lint-ignore.yaml b/.redocly.lint-ignore.yaml new file mode 100644 index 00000000..382c9ab6 --- /dev/null +++ b/.redocly.lint-ignore.yaml @@ -0,0 +1,79 @@ +# This file instructs Redocly's linter to ignore the rules contained for specific parts of your API. +# See https://redoc.ly/docs/cli/ for more information. +docs/openapi.json: + no-empty-servers: + - '#/openapi' + spec: + - >- + #/components/schemas/GenerateParameters/properties/best_of/exclusiveMinimum + - >- + #/components/schemas/GenerateParameters/properties/frequency_penalty/exclusiveMinimum + - '#/components/schemas/GenerateParameters/properties/grammar/nullable' + - >- + #/components/schemas/GenerateParameters/properties/repetition_penalty/exclusiveMinimum + - '#/components/schemas/GenerateParameters/properties/seed/exclusiveMinimum' + - >- + #/components/schemas/GenerateParameters/properties/temperature/exclusiveMinimum + - '#/components/schemas/GenerateParameters/properties/top_k/exclusiveMinimum' + - >- + #/components/schemas/GenerateParameters/properties/top_n_tokens/exclusiveMinimum + - '#/components/schemas/GenerateParameters/properties/top_p/exclusiveMinimum' + - >- + #/components/schemas/GenerateParameters/properties/typical_p/exclusiveMinimum + - '#/components/schemas/GenerateResponse/properties/details/nullable' + - '#/components/schemas/StreamResponse/properties/details/nullable' + - '#/components/schemas/ChatRequest/properties/response_format/nullable' + - '#/components/schemas/ChatRequest/properties/tool_choice/nullable' + - '#/components/schemas/ToolChoice/nullable' + - '#/components/schemas/ChatCompletionComplete/properties/logprobs/nullable' + - '#/components/schemas/ChatCompletionChoice/properties/logprobs/nullable' + no-invalid-media-type-examples: + - '#/paths/~1/post/responses/422/content/application~1json/example' + - '#/paths/~1/post/responses/424/content/application~1json/example' + - '#/paths/~1/post/responses/429/content/application~1json/example' + - '#/paths/~1/post/responses/500/content/application~1json/example' + - '#/paths/~1generate/post/responses/422/content/application~1json/example' + - '#/paths/~1generate/post/responses/424/content/application~1json/example' + - '#/paths/~1generate/post/responses/429/content/application~1json/example' + - '#/paths/~1generate/post/responses/500/content/application~1json/example' + - >- + #/paths/~1generate_stream/post/responses/422/content/text~1event-stream/example + - >- + #/paths/~1generate_stream/post/responses/424/content/text~1event-stream/example + - >- + #/paths/~1generate_stream/post/responses/429/content/text~1event-stream/example + - >- + #/paths/~1generate_stream/post/responses/500/content/text~1event-stream/example + - '#/paths/~1tokenize/post/responses/404/content/application~1json/example' + - >- + #/paths/~1v1~1chat~1completions/post/responses/422/content/application~1json/example + - >- + #/paths/~1v1~1chat~1completions/post/responses/424/content/application~1json/example + - >- + #/paths/~1v1~1chat~1completions/post/responses/429/content/application~1json/example + - >- + #/paths/~1v1~1chat~1completions/post/responses/500/content/application~1json/example + - >- + #/paths/~1v1~1completions/post/responses/422/content/application~1json/example + - >- + #/paths/~1v1~1completions/post/responses/424/content/application~1json/example + - >- + #/paths/~1v1~1completions/post/responses/429/content/application~1json/example + - >- + #/paths/~1v1~1completions/post/responses/500/content/application~1json/example + operation-4xx-response: + - '#/paths/~1health/get/responses' + - '#/paths/~1info/get/responses' + - '#/paths/~1metrics/get/responses' + no-unused-components: + - '#/components/schemas/Completion' + security-defined: + - '#/paths/~1/post' + - '#/paths/~1generate/post' + - '#/paths/~1generate_stream/post' + - '#/paths/~1health/get' + - '#/paths/~1info/get' + - '#/paths/~1metrics/get' + - '#/paths/~1tokenize/post' + - '#/paths/~1v1~1chat~1completions/post' + - '#/paths/~1v1~1completions/post' diff --git a/backends/v3/src/main.rs b/backends/v3/src/main.rs index dfa90682..52e13cfa 100644 --- a/backends/v3/src/main.rs +++ b/backends/v3/src/main.rs @@ -113,19 +113,14 @@ async fn main() -> Result<(), RouterError> { max_client_batch_size, } = args; - let print_schema_command = match command { - Some(Commands::PrintSchema) => true, - None => { - // only init logging if we are not running the print schema command - text_generation_router::logging::init_logging( - otlp_endpoint, - otlp_service_name, - json_output, - ); - false - } + if let Some(Commands::PrintSchema) = command { + use utoipa::OpenApi; + let api_doc = text_generation_router::server::ApiDoc::openapi(); + let api_doc = serde_json::to_string_pretty(&api_doc).unwrap(); + println!("{}", api_doc); + std::process::exit(0); }; - // Launch Tokio runtime + text_generation_router::logging::init_logging(otlp_endpoint, otlp_service_name, json_output); // Validate args if max_input_tokens >= max_total_tokens { @@ -187,7 +182,6 @@ async fn main() -> Result<(), RouterError> { messages_api_enabled, disable_grammar_support, max_client_batch_size, - print_schema_command, ) .await?; Ok(()) diff --git a/docs/openapi.json b/docs/openapi.json index db163ca0..ed9b0b96 100644 --- a/docs/openapi.json +++ b/docs/openapi.json @@ -1580,16 +1580,11 @@ "type": "object", "required": [ "model_id", - "model_dtype", - "model_device_type", "max_concurrent_requests", "max_best_of", "max_stop_sequences", "max_input_tokens", "max_total_tokens", - "waiting_served_ratio", - "max_batch_total_tokens", - "max_waiting_tokens", "validation_workers", "max_client_batch_size", "router", @@ -1601,18 +1596,6 @@ "example": "null", "nullable": true }, - "max_batch_size": { - "type": "integer", - "example": "null", - "nullable": true, - "minimum": 0 - }, - "max_batch_total_tokens": { - "type": "integer", - "format": "int32", - "example": "32000", - "minimum": 0 - }, "max_best_of": { "type": "integer", "example": "2", @@ -1644,19 +1627,6 @@ "example": "2048", "minimum": 0 }, - "max_waiting_tokens": { - "type": "integer", - "example": "20", - "minimum": 0 - }, - "model_device_type": { - "type": "string", - "example": "cuda" - }, - "model_dtype": { - "type": "string", - "example": "torch.float16" - }, "model_id": { "type": "string", "description": "Model info", @@ -1690,11 +1660,6 @@ "version": { "type": "string", "example": "0.5.0" - }, - "waiting_served_ratio": { - "type": "number", - "format": "float", - "example": "1.2" } } }, diff --git a/router/src/server.rs b/router/src/server.rs index 11e84359..c3b32c00 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1396,6 +1396,90 @@ async fn metrics(prom_handle: Extension) -> String { #[derive(Clone, Debug)] pub(crate) struct ComputeType(String); +// OpenAPI documentation +#[derive(OpenApi)] +#[openapi( +paths( +health, +get_model_info, +compat_generate, +generate, +generate_stream, +chat_completions, +completions, +tokenize, +metrics, +), +components( +schemas( +Info, +CompatGenerateRequest, +GenerateRequest, +GrammarType, +ChatRequest, +Message, +MessageContent, +MessageChunk, +Url, +FunctionName, +OutputMessage, +TextMessage, +ToolCallMessage, +ToolCallDelta, +ChatCompletionComplete, +ChatCompletionChoice, +ChatCompletionDelta, +ChatCompletionChunk, +ChatCompletionLogprob, +ChatCompletionLogprobs, +ChatCompletionTopLogprob, +ChatCompletion, +CompletionRequest, +CompletionComplete, +Chunk, +Completion, +CompletionFinal, +Prompt, +GenerateParameters, +PrefillToken, +Token, +GenerateResponse, +TokenizeResponse, +SimpleToken, +BestOfSequence, +Details, +FinishReason, +StreamResponse, +StreamDetails, +ErrorResponse, +GrammarType, +Usage, +DeltaToolCall, +ToolType, +Tool, +ToolCall, +Function, +FunctionDefinition, +ToolChoice, +) +), +tags( +(name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API") +), +info( +title = "Text Generation Inference", +license( +name = "Apache 2.0", +url = "https://www.apache.org/licenses/LICENSE-2.0" +) +) +)] +pub struct ApiDoc; + +pub fn schema() -> ApiDoc { + ApiDoc +} + /// Serving method #[allow(clippy::too_many_arguments)] pub async fn run( @@ -1420,95 +1504,7 @@ pub async fn run( messages_api_enabled: bool, grammar_support: bool, max_client_batch_size: usize, - print_schema_command: bool, ) -> Result<(), WebServerError> { - // OpenAPI documentation - #[derive(OpenApi)] - #[openapi( - paths( - health, - get_model_info, - compat_generate, - generate, - generate_stream, - chat_completions, - completions, - tokenize, - metrics, - ), - components( - schemas( - Info, - CompatGenerateRequest, - GenerateRequest, - GrammarType, - ChatRequest, - Message, - MessageContent, - MessageChunk, - Url, - FunctionName, - OutputMessage, - TextMessage, - ToolCallMessage, - ToolCallDelta, - ChatCompletionComplete, - ChatCompletionChoice, - ChatCompletionDelta, - ChatCompletionChunk, - ChatCompletionLogprob, - ChatCompletionLogprobs, - ChatCompletionTopLogprob, - ChatCompletion, - CompletionRequest, - CompletionComplete, - Chunk, - Completion, - CompletionFinal, - Prompt, - GenerateParameters, - PrefillToken, - Token, - GenerateResponse, - TokenizeResponse, - SimpleToken, - BestOfSequence, - Details, - FinishReason, - StreamResponse, - StreamDetails, - ErrorResponse, - GrammarType, - Usage, - DeltaToolCall, - ToolType, - Tool, - ToolCall, - Function, - FunctionDefinition, - ToolChoice, - ) - ), - tags( - (name = "Text Generation Inference", description = "Hugging Face Text Generation Inference API") - ), - info( - title = "Text Generation Inference", - license( - name = "Apache 2.0", - url = "https://www.apache.org/licenses/LICENSE-2.0" - ) - ) - )] - struct ApiDoc; - - // Create state - if print_schema_command { - let api_doc = ApiDoc::openapi(); - let api_doc = serde_json::to_string_pretty(&api_doc).unwrap(); - println!("{}", api_doc); - std::process::exit(0); - } // CORS allowed origins // map to go inside the option and then map to parse from String to HeaderValue // Finally, convert to AllowOrigin diff --git a/update_doc.py b/update_doc.py index bfa7e4e9..428d4452 100644 --- a/update_doc.py +++ b/update_doc.py @@ -167,22 +167,24 @@ def check_openapi(check: bool): else: os.rename(tmp_filename, filename) print("OpenAPI documentation updated.") - errors = subprocess.run( + p = subprocess.run( [ - "swagger-cli", + "redocly", # allow for trailing whitespace since it's not significant # and the precommit hook will remove it - "validate", + "lint", filename, ], capture_output=True, - ).stderr.decode("utf-8") + ) + errors = p.stderr.decode("utf-8") # The openapi specs fails on `exclusive_minimum` which is expected to be a boolean where # utoipa outputs a value instead: https://github.com/juhaku/utoipa/issues/969 - if not errors.startswith("Swagger schema validation failed."): + print(errors) + if p.returncode != 0: print(errors) raise Exception( - f"OpenAPI documentation is invalid, `swagger-cli validate` showed some error:\n {errors}" + f"OpenAPI documentation is invalid, `redocly lint {filename}` showed some error:\n {errors}" ) return True