diff --git a/backends/v3/src/main.rs b/backends/v3/src/main.rs index ef10514f..21952e66 100644 --- a/backends/v3/src/main.rs +++ b/backends/v3/src/main.rs @@ -1,5 +1,5 @@ use clap::{Parser, Subcommand}; -use text_generation_router::server; +use text_generation_router::{server, usage_stats}; use text_generation_router_v3::{connect_backend, V3Error}; use thiserror::Error; @@ -68,10 +68,8 @@ struct Args { disable_grammar_support: bool, #[clap(default_value = "4", long, env)] max_client_batch_size: usize, - #[clap(long, env, default_value_t)] - disable_usage_stats: bool, - #[clap(long, env, default_value_t)] - disable_crash_reports: bool, + #[clap(default_value = "on", long, env)] + usage_stats: usage_stats::UsageStatsLevel, } #[derive(Debug, Subcommand)] @@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> { ngrok_edge, messages_api_enabled, disable_grammar_support, - disable_usage_stats, - disable_crash_reports, max_client_batch_size, + usage_stats, } = args; if let Some(Commands::PrintSchema) = command { @@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> { messages_api_enabled, disable_grammar_support, max_client_batch_size, - disable_usage_stats, - disable_crash_reports, + usage_stats, ) .await?; Ok(()) diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md index ce98876f..01f15648 100644 --- a/docs/source/basic_tutorials/launcher.md +++ b/docs/source/basic_tutorials/launcher.md @@ -431,20 +431,18 @@ Options: [env: LORA_ADAPTERS=] ``` -## DISABLE_USAGE_STATS +## USAGE_STATS ```shell - --disable-usage-stats - Disable sending of all usage statistics + --usage-stats + Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on - [env: DISABLE_USAGE_STATS=] + [env: USAGE_STATS=] + [default: on] -``` -## DISABLE_CRASH_REPORTS -```shell - --disable-crash-reports - Disable sending of crash reports, but allow anonymous usage statistics - - [env: DISABLE_CRASH_REPORTS=] + Possible values: + - on: Default option, usage statistics are collected anonymously + - off: Disables all collection of usage statistics + - no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event ``` ## HELP diff --git a/docs/source/usage_statistics.md b/docs/source/usage_statistics.md index adf0d70f..a2c406ec 100644 --- a/docs/source/usage_statistics.md +++ b/docs/source/usage_statistics.md @@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected: ## How to opt-out -You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics. +By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected. +`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events +`--usage-stats=off` will completely disable everything diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 0e1405d4..8acfda0c 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling { } } +#[derive(Clone, Copy, Debug, ValueEnum)] +pub enum UsageStatsLevel { + /// Default option, usage statistics are collected anonymously + On, + /// Disables all collection of usage statistics + Off, + /// Doesn't send the error stack trace or error type, but allows sending a crash event + NoStack, +} + +impl std::fmt::Display for UsageStatsLevel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // To keep in track with `server`. + match self { + UsageStatsLevel::On => { + write!(f, "on") + } + UsageStatsLevel::Off => { + write!(f, "off") + } + UsageStatsLevel::NoStack => { + write!(f, "no-stack") + } + } + } +} + /// App Configuration #[derive(Parser, Debug)] #[clap(author, version, about, long_about = None)] @@ -466,13 +493,11 @@ struct Args { #[clap(long, env)] lora_adapters: Option, - /// Disable sending of all usage statistics - #[clap(default_value = "false", long, env)] - disable_usage_stats: bool, - - /// Disable sending of crash reports, but allow anonymous usage statistics - #[clap(default_value = "false", long, env)] - disable_crash_reports: bool, + /// Control if anonymous usage stats are collected. + /// Options are "on", "off" and "no-stack" + /// Defaul is on. + #[clap(default_value = "on", long, env)] + usage_stats: UsageStatsLevel, } #[derive(Debug)] @@ -1218,12 +1243,8 @@ fn spawn_webserver( ]; // Pass usage stats flags to router - if args.disable_usage_stats { - router_args.push("--disable-usage-stats".to_string()); - } - if args.disable_crash_reports { - router_args.push("--disable-crash-reports".to_string()); - } + router_args.push("--usage-stats".to_string()); + router_args.push(args.usage_stats.to_string()); // Grammar support if args.disable_grammar_support { diff --git a/router/src/server.rs b/router/src/server.rs index ccbd1535..dcbaa2ad 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -7,14 +7,13 @@ use crate::kserve::{ kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer, kserve_model_metadata, kserve_model_metadata_ready, }; -use crate::usage_stats; use crate::validation::ValidationError; use crate::{ - BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters, - GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig, - HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken, - SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse, - ToolCallDelta, ToolCallMessage, Url, Usage, Validation, + usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, + GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, + HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, + OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, + TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation, }; use crate::{ ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete, @@ -1505,8 +1504,7 @@ pub async fn run( messages_api_enabled: bool, disable_grammar_support: bool, max_client_batch_size: usize, - disable_usage_stats: bool, - disable_crash_reports: bool, + usage_stats_level: usage_stats::UsageStatsLevel, ) -> Result<(), WebServerError> { // CORS allowed origins // map to go inside the option and then map to parse from String to HeaderValue @@ -1698,33 +1696,32 @@ pub async fn run( // Only send usage stats when TGI is run in container and the function returns Some let is_container = matches!(usage_stats::is_container(), Ok(true)); - - let user_agent = if !disable_usage_stats && is_container { - let reduced_args = usage_stats::Args::new( - config.clone(), - tokenizer_config.tokenizer_class.clone(), - max_concurrent_requests, - max_best_of, - max_stop_sequences, - max_top_n_tokens, - max_input_tokens, - max_total_tokens, - // waiting_served_ratio, - // max_batch_prefill_tokens, - // max_batch_total_tokens, - // max_waiting_tokens, - // max_batch_size, - revision.clone(), - validation_workers, - messages_api_enabled, - disable_grammar_support, - max_client_batch_size, - disable_usage_stats, - disable_crash_reports, - ); - Some(usage_stats::UserAgent::new(reduced_args)) - } else { - None + let user_agent = match (usage_stats_level, is_container) { + (usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => { + let reduced_args = usage_stats::Args::new( + config.clone(), + tokenizer_config.tokenizer_class.clone(), + max_concurrent_requests, + max_best_of, + max_stop_sequences, + max_top_n_tokens, + max_input_tokens, + max_total_tokens, + // waiting_served_ratio, + // max_batch_prefill_tokens, + // max_batch_total_tokens, + // max_waiting_tokens, + // max_batch_size, + revision.clone(), + validation_workers, + messages_api_enabled, + disable_grammar_support, + max_client_batch_size, + usage_stats_level, + ); + Some(usage_stats::UserAgent::new(reduced_args)) + } + _ => None, }; if let Some(ref ua) = user_agent { @@ -1780,21 +1777,18 @@ pub async fn run( Ok(()) } Err(e) => { - if !disable_crash_reports { - let error_event = usage_stats::UsageStatsEvent::new( - ua.clone(), - usage_stats::EventType::Error, - Some(e.to_string()), - ); - error_event.send().await; - } else { - let unknow_error_event = usage_stats::UsageStatsEvent::new( - ua.clone(), - usage_stats::EventType::Error, - Some("unknow_error".to_string()), - ); - unknow_error_event.send().await; - } + let description = match usage_stats_level { + usage_stats::UsageStatsLevel::On => Some(e.to_string()), + usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()), + _ => None, + }; + let event = usage_stats::UsageStatsEvent::new( + ua.clone(), + usage_stats::EventType::Error, + description, + ); + event.send().await; + Err(e) } } diff --git a/router/src/usage_stats.rs b/router/src/usage_stats.rs index fa9f3637..0282ac63 100644 --- a/router/src/usage_stats.rs +++ b/router/src/usage_stats.rs @@ -1,4 +1,5 @@ use crate::config::Config; +use clap::ValueEnum; use csv::ReaderBuilder; use reqwest::header::HeaderMap; use serde::Serialize; @@ -13,6 +14,13 @@ use uuid::Uuid; const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi"; +#[derive(Copy, Clone, Debug, Serialize, ValueEnum)] +pub enum UsageStatsLevel { + On, + NoStack, + Off, +} + #[derive(Debug, Clone, Serialize)] pub struct UserAgent { pub uid: String, @@ -71,7 +79,7 @@ impl UsageStatsEvent { #[derive(Debug, Clone, Serialize)] pub struct Args { model_config: Option, - tokenizer_config: Option, + tokenizer_class: Option, max_concurrent_requests: usize, max_best_of: usize, max_stop_sequences: usize, @@ -88,15 +96,14 @@ pub struct Args { messages_api_enabled: bool, disable_grammar_support: bool, max_client_batch_size: usize, - disable_usage_stats: bool, - disable_crash_reports: bool, + usage_stats_level: UsageStatsLevel, } impl Args { #[allow(clippy::too_many_arguments)] pub fn new( model_config: Option, - tokenizer_config: Option, + tokenizer_class: Option, max_concurrent_requests: usize, max_best_of: usize, max_stop_sequences: usize, @@ -113,12 +120,11 @@ impl Args { messages_api_enabled: bool, disable_grammar_support: bool, max_client_batch_size: usize, - disable_usage_stats: bool, - disable_crash_reports: bool, + usage_stats_level: UsageStatsLevel, ) -> Self { Self { model_config, - tokenizer_config, + tokenizer_class, max_concurrent_requests, max_best_of, max_stop_sequences, @@ -135,8 +141,7 @@ impl Args { messages_api_enabled, disable_grammar_support, max_client_batch_size, - disable_usage_stats, - disable_crash_reports, + usage_stats_level, } } }