mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
update doc
This commit is contained in:
parent
70483428ee
commit
2c8a51a474
@ -321,6 +321,7 @@
|
|||||||
"tags": [
|
"tags": [
|
||||||
"Text Generation Inference"
|
"Text Generation Inference"
|
||||||
],
|
],
|
||||||
|
"summary": "Generate tokens from Sagemaker request",
|
||||||
"operationId": "sagemaker_compatibility",
|
"operationId": "sagemaker_compatibility",
|
||||||
"requestBody": {
|
"requestBody": {
|
||||||
"content": {
|
"content": {
|
||||||
@ -356,7 +357,8 @@
|
|||||||
"$ref": "#/components/schemas/ErrorResponse"
|
"$ref": "#/components/schemas/ErrorResponse"
|
||||||
},
|
},
|
||||||
"example": {
|
"example": {
|
||||||
"error": "Input validation error"
|
"error": "Input validation error",
|
||||||
|
"error_type": "validation"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -369,7 +371,8 @@
|
|||||||
"$ref": "#/components/schemas/ErrorResponse"
|
"$ref": "#/components/schemas/ErrorResponse"
|
||||||
},
|
},
|
||||||
"example": {
|
"example": {
|
||||||
"error": "Request failed during generation"
|
"error": "Request failed during generation",
|
||||||
|
"error_type": "generation"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -382,7 +385,8 @@
|
|||||||
"$ref": "#/components/schemas/ErrorResponse"
|
"$ref": "#/components/schemas/ErrorResponse"
|
||||||
},
|
},
|
||||||
"example": {
|
"example": {
|
||||||
"error": "Model is overloaded"
|
"error": "Model is overloaded",
|
||||||
|
"error_type": "overloaded"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -395,7 +399,8 @@
|
|||||||
"$ref": "#/components/schemas/ErrorResponse"
|
"$ref": "#/components/schemas/ErrorResponse"
|
||||||
},
|
},
|
||||||
"example": {
|
"example": {
|
||||||
"error": "Incomplete generation"
|
"error": "Incomplete generation",
|
||||||
|
"error_type": "incomplete_generation"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ pub(crate) enum SagemakerRequest {
|
|||||||
Completion(CompletionRequest),
|
Completion(CompletionRequest),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Used for OpenAPI specs
|
// Used for OpenAPI specs
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Serialize, ToSchema)]
|
#[derive(Serialize, ToSchema)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
@ -30,7 +30,7 @@ pub(crate) enum SagemakerResponse {
|
|||||||
Completion(CompletionFinal),
|
Completion(CompletionFinal),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Used for OpenAPI specs
|
// Used for OpenAPI specs
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(Serialize, ToSchema)]
|
#[derive(Serialize, ToSchema)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
@ -40,7 +40,7 @@ pub(crate) enum SagemakerStreamResponse {
|
|||||||
Completion(Chunk),
|
Completion(Chunk),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate tokens from Sagemaker request
|
/// Generate tokens from Sagemaker request
|
||||||
#[utoipa::path(
|
#[utoipa::path(
|
||||||
post,
|
post,
|
||||||
tag = "Text Generation Inference",
|
tag = "Text Generation Inference",
|
||||||
@ -53,13 +53,13 @@ content(
|
|||||||
("text/event-stream" = SagemakerStreamResponse),
|
("text/event-stream" = SagemakerStreamResponse),
|
||||||
)),
|
)),
|
||||||
(status = 424, description = "Generation Error", body = ErrorResponse,
|
(status = 424, description = "Generation Error", body = ErrorResponse,
|
||||||
example = json ! ({"error": "Request failed during generation"})),
|
example = json ! ({"error": "Request failed during generation", "error_type": "generation"})),
|
||||||
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
(status = 429, description = "Model is overloaded", body = ErrorResponse,
|
||||||
example = json ! ({"error": "Model is overloaded"})),
|
example = json ! ({"error": "Model is overloaded", "error_type": "overloaded"})),
|
||||||
(status = 422, description = "Input validation error", body = ErrorResponse,
|
(status = 422, description = "Input validation error", body = ErrorResponse,
|
||||||
example = json ! ({"error": "Input validation error"})),
|
example = json ! ({"error": "Input validation error", "error_type": "validation"})),
|
||||||
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
(status = 500, description = "Incomplete generation", body = ErrorResponse,
|
||||||
example = json ! ({"error": "Incomplete generation"})),
|
example = json ! ({"error": "Incomplete generation", "error_type": "incomplete_generation"})),
|
||||||
)
|
)
|
||||||
)]
|
)]
|
||||||
#[instrument(skip_all)]
|
#[instrument(skip_all)]
|
||||||
|
@ -172,6 +172,8 @@ def check_openapi(check: bool):
|
|||||||
# allow for trailing whitespace since it's not significant
|
# allow for trailing whitespace since it's not significant
|
||||||
# and the precommit hook will remove it
|
# and the precommit hook will remove it
|
||||||
"lint",
|
"lint",
|
||||||
|
"--skip-rule",
|
||||||
|
"security-defined",
|
||||||
filename,
|
filename,
|
||||||
],
|
],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
|
Loading…
Reference in New Issue
Block a user