mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
refactor usage stats (#2339)
* refactor usage stats * Update docs/source/usage_statistics.md Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * changes based on feedback * run python3 udpate_doc.py * fix pre-commit * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * delete option around usage stats arg --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
f7f61876cf
commit
7451041ecd
@ -1,5 +1,5 @@
|
|||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
use text_generation_router::server;
|
use text_generation_router::{server, usage_stats};
|
||||||
use text_generation_router_v3::{connect_backend, V3Error};
|
use text_generation_router_v3::{connect_backend, V3Error};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
@ -68,10 +68,8 @@ struct Args {
|
|||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
#[clap(default_value = "4", long, env)]
|
#[clap(default_value = "4", long, env)]
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
#[clap(long, env, default_value_t)]
|
#[clap(default_value = "on", long, env)]
|
||||||
disable_usage_stats: bool,
|
usage_stats: usage_stats::UsageStatsLevel,
|
||||||
#[clap(long, env, default_value_t)]
|
|
||||||
disable_crash_reports: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Subcommand)]
|
#[derive(Debug, Subcommand)]
|
||||||
@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
ngrok_edge,
|
ngrok_edge,
|
||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
disable_usage_stats,
|
|
||||||
disable_crash_reports,
|
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
|
usage_stats,
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
if let Some(Commands::PrintSchema) = command {
|
if let Some(Commands::PrintSchema) = command {
|
||||||
@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
disable_usage_stats,
|
usage_stats,
|
||||||
disable_crash_reports,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -431,20 +431,18 @@ Options:
|
|||||||
[env: LORA_ADAPTERS=]
|
[env: LORA_ADAPTERS=]
|
||||||
|
|
||||||
```
|
```
|
||||||
## DISABLE_USAGE_STATS
|
## USAGE_STATS
|
||||||
```shell
|
```shell
|
||||||
--disable-usage-stats
|
--usage-stats <USAGE_STATS>
|
||||||
Disable sending of all usage statistics
|
Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on
|
||||||
|
|
||||||
[env: DISABLE_USAGE_STATS=]
|
[env: USAGE_STATS=]
|
||||||
|
[default: on]
|
||||||
|
|
||||||
```
|
Possible values:
|
||||||
## DISABLE_CRASH_REPORTS
|
- on: Default option, usage statistics are collected anonymously
|
||||||
```shell
|
- off: Disables all collection of usage statistics
|
||||||
--disable-crash-reports
|
- no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event
|
||||||
Disable sending of crash reports, but allow anonymous usage statistics
|
|
||||||
|
|
||||||
[env: DISABLE_CRASH_REPORTS=]
|
|
||||||
|
|
||||||
```
|
```
|
||||||
## HELP
|
## HELP
|
||||||
|
@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected:
|
|||||||
|
|
||||||
## How to opt-out
|
## How to opt-out
|
||||||
|
|
||||||
You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics.
|
By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected.
|
||||||
|
`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events
|
||||||
|
`--usage-stats=off` will completely disable everything
|
||||||
|
@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||||
|
pub enum UsageStatsLevel {
|
||||||
|
/// Default option, usage statistics are collected anonymously
|
||||||
|
On,
|
||||||
|
/// Disables all collection of usage statistics
|
||||||
|
Off,
|
||||||
|
/// Doesn't send the error stack trace or error type, but allows sending a crash event
|
||||||
|
NoStack,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for UsageStatsLevel {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
// To keep in track with `server`.
|
||||||
|
match self {
|
||||||
|
UsageStatsLevel::On => {
|
||||||
|
write!(f, "on")
|
||||||
|
}
|
||||||
|
UsageStatsLevel::Off => {
|
||||||
|
write!(f, "off")
|
||||||
|
}
|
||||||
|
UsageStatsLevel::NoStack => {
|
||||||
|
write!(f, "no-stack")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// App Configuration
|
/// App Configuration
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[clap(author, version, about, long_about = None)]
|
#[clap(author, version, about, long_about = None)]
|
||||||
@ -466,13 +493,11 @@ struct Args {
|
|||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
lora_adapters: Option<String>,
|
lora_adapters: Option<String>,
|
||||||
|
|
||||||
/// Disable sending of all usage statistics
|
/// Control if anonymous usage stats are collected.
|
||||||
#[clap(default_value = "false", long, env)]
|
/// Options are "on", "off" and "no-stack"
|
||||||
disable_usage_stats: bool,
|
/// Defaul is on.
|
||||||
|
#[clap(default_value = "on", long, env)]
|
||||||
/// Disable sending of crash reports, but allow anonymous usage statistics
|
usage_stats: UsageStatsLevel,
|
||||||
#[clap(default_value = "false", long, env)]
|
|
||||||
disable_crash_reports: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -1218,12 +1243,8 @@ fn spawn_webserver(
|
|||||||
];
|
];
|
||||||
|
|
||||||
// Pass usage stats flags to router
|
// Pass usage stats flags to router
|
||||||
if args.disable_usage_stats {
|
router_args.push("--usage-stats".to_string());
|
||||||
router_args.push("--disable-usage-stats".to_string());
|
router_args.push(args.usage_stats.to_string());
|
||||||
}
|
|
||||||
if args.disable_crash_reports {
|
|
||||||
router_args.push("--disable-crash-reports".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Grammar support
|
// Grammar support
|
||||||
if args.disable_grammar_support {
|
if args.disable_grammar_support {
|
||||||
|
@ -7,14 +7,13 @@ use crate::kserve::{
|
|||||||
kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
|
kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
|
||||||
kserve_model_metadata, kserve_model_metadata_ready,
|
kserve_model_metadata, kserve_model_metadata_ready,
|
||||||
};
|
};
|
||||||
use crate::usage_stats;
|
|
||||||
use crate::validation::ValidationError;
|
use crate::validation::ValidationError;
|
||||||
use crate::{
|
use crate::{
|
||||||
BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters,
|
usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
|
||||||
GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig,
|
GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
|
||||||
HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken,
|
HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent,
|
||||||
SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse,
|
OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token,
|
||||||
ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
|
TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
|
ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
|
||||||
@ -1505,8 +1504,7 @@ pub async fn run(
|
|||||||
messages_api_enabled: bool,
|
messages_api_enabled: bool,
|
||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
disable_usage_stats: bool,
|
usage_stats_level: usage_stats::UsageStatsLevel,
|
||||||
disable_crash_reports: bool,
|
|
||||||
) -> Result<(), WebServerError> {
|
) -> Result<(), WebServerError> {
|
||||||
// CORS allowed origins
|
// CORS allowed origins
|
||||||
// map to go inside the option and then map to parse from String to HeaderValue
|
// map to go inside the option and then map to parse from String to HeaderValue
|
||||||
@ -1698,8 +1696,8 @@ pub async fn run(
|
|||||||
|
|
||||||
// Only send usage stats when TGI is run in container and the function returns Some
|
// Only send usage stats when TGI is run in container and the function returns Some
|
||||||
let is_container = matches!(usage_stats::is_container(), Ok(true));
|
let is_container = matches!(usage_stats::is_container(), Ok(true));
|
||||||
|
let user_agent = match (usage_stats_level, is_container) {
|
||||||
let user_agent = if !disable_usage_stats && is_container {
|
(usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => {
|
||||||
let reduced_args = usage_stats::Args::new(
|
let reduced_args = usage_stats::Args::new(
|
||||||
config.clone(),
|
config.clone(),
|
||||||
tokenizer_config.tokenizer_class.clone(),
|
tokenizer_config.tokenizer_class.clone(),
|
||||||
@ -1719,12 +1717,11 @@ pub async fn run(
|
|||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
disable_usage_stats,
|
usage_stats_level,
|
||||||
disable_crash_reports,
|
|
||||||
);
|
);
|
||||||
Some(usage_stats::UserAgent::new(reduced_args))
|
Some(usage_stats::UserAgent::new(reduced_args))
|
||||||
} else {
|
}
|
||||||
None
|
_ => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(ref ua) = user_agent {
|
if let Some(ref ua) = user_agent {
|
||||||
@ -1780,21 +1777,18 @@ pub async fn run(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
if !disable_crash_reports {
|
let description = match usage_stats_level {
|
||||||
let error_event = usage_stats::UsageStatsEvent::new(
|
usage_stats::UsageStatsLevel::On => Some(e.to_string()),
|
||||||
|
usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
let event = usage_stats::UsageStatsEvent::new(
|
||||||
ua.clone(),
|
ua.clone(),
|
||||||
usage_stats::EventType::Error,
|
usage_stats::EventType::Error,
|
||||||
Some(e.to_string()),
|
description,
|
||||||
);
|
);
|
||||||
error_event.send().await;
|
event.send().await;
|
||||||
} else {
|
|
||||||
let unknow_error_event = usage_stats::UsageStatsEvent::new(
|
|
||||||
ua.clone(),
|
|
||||||
usage_stats::EventType::Error,
|
|
||||||
Some("unknow_error".to_string()),
|
|
||||||
);
|
|
||||||
unknow_error_event.send().await;
|
|
||||||
}
|
|
||||||
Err(e)
|
Err(e)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use crate::config::Config;
|
use crate::config::Config;
|
||||||
|
use clap::ValueEnum;
|
||||||
use csv::ReaderBuilder;
|
use csv::ReaderBuilder;
|
||||||
use reqwest::header::HeaderMap;
|
use reqwest::header::HeaderMap;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@ -13,6 +14,13 @@ use uuid::Uuid;
|
|||||||
|
|
||||||
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
|
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Serialize, ValueEnum)]
|
||||||
|
pub enum UsageStatsLevel {
|
||||||
|
On,
|
||||||
|
NoStack,
|
||||||
|
Off,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct UserAgent {
|
pub struct UserAgent {
|
||||||
pub uid: String,
|
pub uid: String,
|
||||||
@ -71,7 +79,7 @@ impl UsageStatsEvent {
|
|||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
pub struct Args {
|
pub struct Args {
|
||||||
model_config: Option<Config>,
|
model_config: Option<Config>,
|
||||||
tokenizer_config: Option<String>,
|
tokenizer_class: Option<String>,
|
||||||
max_concurrent_requests: usize,
|
max_concurrent_requests: usize,
|
||||||
max_best_of: usize,
|
max_best_of: usize,
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
@ -88,15 +96,14 @@ pub struct Args {
|
|||||||
messages_api_enabled: bool,
|
messages_api_enabled: bool,
|
||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
disable_usage_stats: bool,
|
usage_stats_level: UsageStatsLevel,
|
||||||
disable_crash_reports: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Args {
|
impl Args {
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
model_config: Option<Config>,
|
model_config: Option<Config>,
|
||||||
tokenizer_config: Option<String>,
|
tokenizer_class: Option<String>,
|
||||||
max_concurrent_requests: usize,
|
max_concurrent_requests: usize,
|
||||||
max_best_of: usize,
|
max_best_of: usize,
|
||||||
max_stop_sequences: usize,
|
max_stop_sequences: usize,
|
||||||
@ -113,12 +120,11 @@ impl Args {
|
|||||||
messages_api_enabled: bool,
|
messages_api_enabled: bool,
|
||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
disable_usage_stats: bool,
|
usage_stats_level: UsageStatsLevel,
|
||||||
disable_crash_reports: bool,
|
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
model_config,
|
model_config,
|
||||||
tokenizer_config,
|
tokenizer_class,
|
||||||
max_concurrent_requests,
|
max_concurrent_requests,
|
||||||
max_best_of,
|
max_best_of,
|
||||||
max_stop_sequences,
|
max_stop_sequences,
|
||||||
@ -135,8 +141,7 @@ impl Args {
|
|||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
disable_usage_stats,
|
usage_stats_level,
|
||||||
disable_crash_reports,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user