refactor usage stats (#2339)

* refactor usage stats

* Update docs/source/usage_statistics.md

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* Update router/src/server.rs

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* changes based on feedback

* run python3 udpate_doc.py

* fix pre-commit

* Update router/src/server.rs

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* delete option around usage stats arg

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Erik Kaunismäki 2024-07-31 16:29:07 +02:00 committed by GitHub
parent f7f61876cf
commit 7451041ecd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 109 additions and 93 deletions

View File

@ -1,5 +1,5 @@
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use text_generation_router::server; use text_generation_router::{server, usage_stats};
use text_generation_router_v3::{connect_backend, V3Error}; use text_generation_router_v3::{connect_backend, V3Error};
use thiserror::Error; use thiserror::Error;
@ -68,10 +68,8 @@ struct Args {
disable_grammar_support: bool, disable_grammar_support: bool,
#[clap(default_value = "4", long, env)] #[clap(default_value = "4", long, env)]
max_client_batch_size: usize, max_client_batch_size: usize,
#[clap(long, env, default_value_t)] #[clap(default_value = "on", long, env)]
disable_usage_stats: bool, usage_stats: usage_stats::UsageStatsLevel,
#[clap(long, env, default_value_t)]
disable_crash_reports: bool,
} }
#[derive(Debug, Subcommand)] #[derive(Debug, Subcommand)]
@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> {
ngrok_edge, ngrok_edge,
messages_api_enabled, messages_api_enabled,
disable_grammar_support, disable_grammar_support,
disable_usage_stats,
disable_crash_reports,
max_client_batch_size, max_client_batch_size,
usage_stats,
} = args; } = args;
if let Some(Commands::PrintSchema) = command { if let Some(Commands::PrintSchema) = command {
@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> {
messages_api_enabled, messages_api_enabled,
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
disable_usage_stats, usage_stats,
disable_crash_reports,
) )
.await?; .await?;
Ok(()) Ok(())

View File

@ -431,20 +431,18 @@ Options:
[env: LORA_ADAPTERS=] [env: LORA_ADAPTERS=]
``` ```
## DISABLE_USAGE_STATS ## USAGE_STATS
```shell ```shell
--disable-usage-stats --usage-stats <USAGE_STATS>
Disable sending of all usage statistics Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on
[env: DISABLE_USAGE_STATS=] [env: USAGE_STATS=]
[default: on]
``` Possible values:
## DISABLE_CRASH_REPORTS - on: Default option, usage statistics are collected anonymously
```shell - off: Disables all collection of usage statistics
--disable-crash-reports - no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event
Disable sending of crash reports, but allow anonymous usage statistics
[env: DISABLE_CRASH_REPORTS=]
``` ```
## HELP ## HELP

View File

@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected:
## How to opt-out ## How to opt-out
You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics. By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected.
`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events
`--usage-stats=off` will completely disable everything

View File

@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling {
} }
} }
#[derive(Clone, Copy, Debug, ValueEnum)]
pub enum UsageStatsLevel {
/// Default option, usage statistics are collected anonymously
On,
/// Disables all collection of usage statistics
Off,
/// Doesn't send the error stack trace or error type, but allows sending a crash event
NoStack,
}
impl std::fmt::Display for UsageStatsLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// To keep in track with `server`.
match self {
UsageStatsLevel::On => {
write!(f, "on")
}
UsageStatsLevel::Off => {
write!(f, "off")
}
UsageStatsLevel::NoStack => {
write!(f, "no-stack")
}
}
}
}
/// App Configuration /// App Configuration
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)] #[clap(author, version, about, long_about = None)]
@ -466,13 +493,11 @@ struct Args {
#[clap(long, env)] #[clap(long, env)]
lora_adapters: Option<String>, lora_adapters: Option<String>,
/// Disable sending of all usage statistics /// Control if anonymous usage stats are collected.
#[clap(default_value = "false", long, env)] /// Options are "on", "off" and "no-stack"
disable_usage_stats: bool, /// Defaul is on.
#[clap(default_value = "on", long, env)]
/// Disable sending of crash reports, but allow anonymous usage statistics usage_stats: UsageStatsLevel,
#[clap(default_value = "false", long, env)]
disable_crash_reports: bool,
} }
#[derive(Debug)] #[derive(Debug)]
@ -1218,12 +1243,8 @@ fn spawn_webserver(
]; ];
// Pass usage stats flags to router // Pass usage stats flags to router
if args.disable_usage_stats { router_args.push("--usage-stats".to_string());
router_args.push("--disable-usage-stats".to_string()); router_args.push(args.usage_stats.to_string());
}
if args.disable_crash_reports {
router_args.push("--disable-crash-reports".to_string());
}
// Grammar support // Grammar support
if args.disable_grammar_support { if args.disable_grammar_support {

View File

@ -7,14 +7,13 @@ use crate::kserve::{
kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer, kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
kserve_model_metadata, kserve_model_metadata_ready, kserve_model_metadata, kserve_model_metadata_ready,
}; };
use crate::usage_stats;
use crate::validation::ValidationError; use crate::validation::ValidationError;
use crate::{ use crate::{
BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters, usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig, GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken, HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent,
SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse, OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token,
ToolCallDelta, ToolCallMessage, Url, Usage, Validation, TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
}; };
use crate::{ use crate::{
ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete, ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
@ -1505,8 +1504,7 @@ pub async fn run(
messages_api_enabled: bool, messages_api_enabled: bool,
disable_grammar_support: bool, disable_grammar_support: bool,
max_client_batch_size: usize, max_client_batch_size: usize,
disable_usage_stats: bool, usage_stats_level: usage_stats::UsageStatsLevel,
disable_crash_reports: bool,
) -> Result<(), WebServerError> { ) -> Result<(), WebServerError> {
// CORS allowed origins // CORS allowed origins
// map to go inside the option and then map to parse from String to HeaderValue // map to go inside the option and then map to parse from String to HeaderValue
@ -1698,33 +1696,32 @@ pub async fn run(
// Only send usage stats when TGI is run in container and the function returns Some // Only send usage stats when TGI is run in container and the function returns Some
let is_container = matches!(usage_stats::is_container(), Ok(true)); let is_container = matches!(usage_stats::is_container(), Ok(true));
let user_agent = match (usage_stats_level, is_container) {
let user_agent = if !disable_usage_stats && is_container { (usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => {
let reduced_args = usage_stats::Args::new( let reduced_args = usage_stats::Args::new(
config.clone(), config.clone(),
tokenizer_config.tokenizer_class.clone(), tokenizer_config.tokenizer_class.clone(),
max_concurrent_requests, max_concurrent_requests,
max_best_of, max_best_of,
max_stop_sequences, max_stop_sequences,
max_top_n_tokens, max_top_n_tokens,
max_input_tokens, max_input_tokens,
max_total_tokens, max_total_tokens,
// waiting_served_ratio, // waiting_served_ratio,
// max_batch_prefill_tokens, // max_batch_prefill_tokens,
// max_batch_total_tokens, // max_batch_total_tokens,
// max_waiting_tokens, // max_waiting_tokens,
// max_batch_size, // max_batch_size,
revision.clone(), revision.clone(),
validation_workers, validation_workers,
messages_api_enabled, messages_api_enabled,
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
disable_usage_stats, usage_stats_level,
disable_crash_reports, );
); Some(usage_stats::UserAgent::new(reduced_args))
Some(usage_stats::UserAgent::new(reduced_args)) }
} else { _ => None,
None
}; };
if let Some(ref ua) = user_agent { if let Some(ref ua) = user_agent {
@ -1780,21 +1777,18 @@ pub async fn run(
Ok(()) Ok(())
} }
Err(e) => { Err(e) => {
if !disable_crash_reports { let description = match usage_stats_level {
let error_event = usage_stats::UsageStatsEvent::new( usage_stats::UsageStatsLevel::On => Some(e.to_string()),
ua.clone(), usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()),
usage_stats::EventType::Error, _ => None,
Some(e.to_string()), };
); let event = usage_stats::UsageStatsEvent::new(
error_event.send().await; ua.clone(),
} else { usage_stats::EventType::Error,
let unknow_error_event = usage_stats::UsageStatsEvent::new( description,
ua.clone(), );
usage_stats::EventType::Error, event.send().await;
Some("unknow_error".to_string()),
);
unknow_error_event.send().await;
}
Err(e) Err(e)
} }
} }

View File

@ -1,4 +1,5 @@
use crate::config::Config; use crate::config::Config;
use clap::ValueEnum;
use csv::ReaderBuilder; use csv::ReaderBuilder;
use reqwest::header::HeaderMap; use reqwest::header::HeaderMap;
use serde::Serialize; use serde::Serialize;
@ -13,6 +14,13 @@ use uuid::Uuid;
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi"; const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
#[derive(Copy, Clone, Debug, Serialize, ValueEnum)]
pub enum UsageStatsLevel {
On,
NoStack,
Off,
}
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub struct UserAgent { pub struct UserAgent {
pub uid: String, pub uid: String,
@ -71,7 +79,7 @@ impl UsageStatsEvent {
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
pub struct Args { pub struct Args {
model_config: Option<Config>, model_config: Option<Config>,
tokenizer_config: Option<String>, tokenizer_class: Option<String>,
max_concurrent_requests: usize, max_concurrent_requests: usize,
max_best_of: usize, max_best_of: usize,
max_stop_sequences: usize, max_stop_sequences: usize,
@ -88,15 +96,14 @@ pub struct Args {
messages_api_enabled: bool, messages_api_enabled: bool,
disable_grammar_support: bool, disable_grammar_support: bool,
max_client_batch_size: usize, max_client_batch_size: usize,
disable_usage_stats: bool, usage_stats_level: UsageStatsLevel,
disable_crash_reports: bool,
} }
impl Args { impl Args {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn new( pub fn new(
model_config: Option<Config>, model_config: Option<Config>,
tokenizer_config: Option<String>, tokenizer_class: Option<String>,
max_concurrent_requests: usize, max_concurrent_requests: usize,
max_best_of: usize, max_best_of: usize,
max_stop_sequences: usize, max_stop_sequences: usize,
@ -113,12 +120,11 @@ impl Args {
messages_api_enabled: bool, messages_api_enabled: bool,
disable_grammar_support: bool, disable_grammar_support: bool,
max_client_batch_size: usize, max_client_batch_size: usize,
disable_usage_stats: bool, usage_stats_level: UsageStatsLevel,
disable_crash_reports: bool,
) -> Self { ) -> Self {
Self { Self {
model_config, model_config,
tokenizer_config, tokenizer_class,
max_concurrent_requests, max_concurrent_requests,
max_best_of, max_best_of,
max_stop_sequences, max_stop_sequences,
@ -135,8 +141,7 @@ impl Args {
messages_api_enabled, messages_api_enabled,
disable_grammar_support, disable_grammar_support,
max_client_batch_size, max_client_batch_size,
disable_usage_stats, usage_stats_level,
disable_crash_reports,
} }
} }
} }