mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 21:04:53 +00:00
draft of usage stats
This commit is contained in:
parent
245d3de948
commit
8649de3990
40
Cargo.lock
generated
40
Cargo.lock
generated
@ -3424,9 +3424,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_json"
|
name = "serde_json"
|
||||||
version = "1.0.118"
|
version = "1.0.120"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4"
|
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"itoa",
|
"itoa",
|
||||||
"ryu",
|
"ryu",
|
||||||
@ -3672,15 +3672,16 @@ checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sysinfo"
|
name = "sysinfo"
|
||||||
version = "0.30.12"
|
version = "0.30.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae"
|
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"core-foundation-sys",
|
"core-foundation-sys",
|
||||||
"libc",
|
"libc",
|
||||||
"ntapi",
|
"ntapi",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"rayon",
|
||||||
"windows",
|
"windows",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -3762,7 +3763,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-benchmark"
|
name = "text-generation-benchmark"
|
||||||
version = "2.1.1-dev0"
|
version = "2.1.2-dev0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"average",
|
"average",
|
||||||
"clap",
|
"clap",
|
||||||
@ -3783,7 +3784,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-client"
|
name = "text-generation-client"
|
||||||
version = "2.1.1-dev0"
|
version = "2.1.2-dev0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-trait",
|
"async-trait",
|
||||||
"base64 0.22.1",
|
"base64 0.22.1",
|
||||||
@ -3801,7 +3802,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-launcher"
|
name = "text-generation-launcher"
|
||||||
version = "2.1.1-dev0"
|
version = "2.1.2-dev0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"ctrlc",
|
"ctrlc",
|
||||||
@ -3812,6 +3813,7 @@ dependencies = [
|
|||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sysinfo",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
@ -3820,7 +3822,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-router"
|
name = "text-generation-router"
|
||||||
version = "2.1.1-dev0"
|
version = "2.1.2-dev0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"axum 0.7.5",
|
"axum 0.7.5",
|
||||||
@ -3848,6 +3850,7 @@ dependencies = [
|
|||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"sysinfo",
|
||||||
"text-generation-client",
|
"text-generation-client",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"tokenizers",
|
"tokenizers",
|
||||||
@ -3859,6 +3862,7 @@ dependencies = [
|
|||||||
"tracing-subscriber",
|
"tracing-subscriber",
|
||||||
"utoipa",
|
"utoipa",
|
||||||
"utoipa-swagger-ui",
|
"utoipa-swagger-ui",
|
||||||
|
"uuid",
|
||||||
"vergen",
|
"vergen",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -4530,9 +4534,25 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "uuid"
|
name = "uuid"
|
||||||
version = "1.9.1"
|
version = "1.10.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439"
|
checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314"
|
||||||
|
dependencies = [
|
||||||
|
"getrandom",
|
||||||
|
"rand",
|
||||||
|
"uuid-macro-internal",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "uuid-macro-internal"
|
||||||
|
version = "1.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ee1cd046f83ea2c4e920d6ee9f7c3537ef928d75dce5d84a87c2c5d6b3999a3a"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.68",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "v_frame"
|
name = "v_frame"
|
||||||
|
63
docs/source/usage_statistics.md
Normal file
63
docs/source/usage_statistics.md
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
|
||||||
|
# Collection of Usage Statistics
|
||||||
|
|
||||||
|
Text Generation Inference collects anonymous usage statistics to help us improve the service. The collected data is used to improve TGI and to understand what causes failures. The data is collected transparently and any sensitive information is omitted.
|
||||||
|
|
||||||
|
Data is sent twice 1) on server startup and 2) and when server stops. Also, usage statistics are only enabled when TGI is running in docker to avoid collecting data then TGI runs directly on the host machine.
|
||||||
|
|
||||||
|
## What data is collected
|
||||||
|
|
||||||
|
The code that collects the data is available [here](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/usage_stats.rs).
|
||||||
|
As of release 2.1.2 this is an example of the data collected:
|
||||||
|
|
||||||
|
- From the TGI configuration:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"event_type": "start",
|
||||||
|
"disable_grammar_support": false,
|
||||||
|
"json_output": false,
|
||||||
|
"max_batch_prefill_tokens": 4096,
|
||||||
|
"max_batch_size": null,
|
||||||
|
"max_batch_total_tokens": null,
|
||||||
|
"max_best_of": 2,
|
||||||
|
"max_client_batch_size": 4,
|
||||||
|
"max_concurrent_requests": 128,
|
||||||
|
"max_input_tokens": 1024,
|
||||||
|
"max_stop_sequences": 4,
|
||||||
|
"max_top_n_tokens": 5,
|
||||||
|
"max_total_tokens": 2048,
|
||||||
|
"max_waiting_tokens": 20,
|
||||||
|
"messages_api_enabled": false,
|
||||||
|
"model_config": {
|
||||||
|
"model_type": "bloom"
|
||||||
|
},
|
||||||
|
"ngrok": false,
|
||||||
|
"revision": null,
|
||||||
|
"tokenizer_config": {
|
||||||
|
"add_bos_token": null,
|
||||||
|
"add_eos_token": null,
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": null,
|
||||||
|
"completion_template": null,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"tokenizer_class": "BloomTokenizerFast"
|
||||||
|
},
|
||||||
|
"validation_workers": 2,
|
||||||
|
"waiting_served_ratio": 1.2,
|
||||||
|
"docker_label": "latest",
|
||||||
|
"git_sha": "245d3de94877d4910ea7876f6bd19b4d7d4a47d9",
|
||||||
|
"nvidia_env": "N/A",
|
||||||
|
"system_env": {
|
||||||
|
"architecture": "aarch64",
|
||||||
|
"cpu_count": 16,
|
||||||
|
"cpu_type": "Apple M3",
|
||||||
|
"platform": "macos-unix-aarch64",
|
||||||
|
"total_memory": 25769803767
|
||||||
|
},
|
||||||
|
"xpu_env": "N/A"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## How to opt-out
|
||||||
|
|
||||||
|
You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending of crash reports, but allows anonymous usage statistics.
|
@ -14,6 +14,7 @@ nix = { version = "0.28.0", features = ["signal"] }
|
|||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
serde = { version = "1.0.188", features = ["derive"] }
|
serde = { version = "1.0.188", features = ["derive"] }
|
||||||
serde_json = "1.0.107"
|
serde_json = "1.0.107"
|
||||||
|
sysinfo = "0.30.12"
|
||||||
thiserror = "1.0.59"
|
thiserror = "1.0.59"
|
||||||
tracing = "0.1.37"
|
tracing = "0.1.37"
|
||||||
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
|
||||||
|
@ -457,6 +457,14 @@ struct Args {
|
|||||||
/// startup that will be available to callers via the `adapter_id` field in a request.
|
/// startup that will be available to callers via the `adapter_id` field in a request.
|
||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
lora_adapters: Option<String>,
|
lora_adapters: Option<String>,
|
||||||
|
|
||||||
|
/// Disable sending of all usage statistics
|
||||||
|
#[clap(default_value = "false", long, env)]
|
||||||
|
disable_usage_stats: bool,
|
||||||
|
|
||||||
|
/// Disable sending of crash reports, but allow anonymous usage statistics
|
||||||
|
#[clap(default_value = "false", long, env)]
|
||||||
|
disable_crash_reports: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@ -1199,8 +1207,20 @@ fn spawn_webserver(
|
|||||||
format!("{}-0", args.shard_uds_path),
|
format!("{}-0", args.shard_uds_path),
|
||||||
"--tokenizer-name".to_string(),
|
"--tokenizer-name".to_string(),
|
||||||
args.model_id,
|
args.model_id,
|
||||||
|
"--disable-usage-stats".to_string(),
|
||||||
|
args.disable_usage_stats.to_string(),
|
||||||
|
"--disable-crash-reports".to_string(),
|
||||||
|
args.disable_crash_reports.to_string(),
|
||||||
];
|
];
|
||||||
|
|
||||||
|
// Pass usage stats flags to router
|
||||||
|
if args.disable_usage_stats {
|
||||||
|
router_args.push("--usage-stats".to_string());
|
||||||
|
}
|
||||||
|
if args.disable_crash_reports {
|
||||||
|
router_args.push("--crash-reports".to_string());
|
||||||
|
}
|
||||||
|
|
||||||
// Grammar support
|
// Grammar support
|
||||||
if args.disable_grammar_support {
|
if args.disable_grammar_support {
|
||||||
router_args.push("--disable-grammar-support".to_string());
|
router_args.push("--disable-grammar-support".to_string());
|
||||||
|
@ -52,6 +52,9 @@ regex = "1.10.3"
|
|||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
image = "0.25.1"
|
image = "0.25.1"
|
||||||
base64 = { workspace = true }
|
base64 = { workspace = true }
|
||||||
|
sysinfo = "0.30.13"
|
||||||
|
uuid = { version = "1.9.1", default-features = false, features = ["v4", "fast-rng", "macro-diagnostics"] }
|
||||||
|
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
|
vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
|
||||||
|
@ -7,6 +7,8 @@ mod validation;
|
|||||||
#[cfg(feature = "kserve")]
|
#[cfg(feature = "kserve")]
|
||||||
mod kserve;
|
mod kserve;
|
||||||
|
|
||||||
|
pub mod usage_stats;
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tracing::warn;
|
use tracing::warn;
|
||||||
use utoipa::ToSchema;
|
use utoipa::ToSchema;
|
||||||
@ -40,13 +42,13 @@ pub struct HubModelInfo {
|
|||||||
pub pipeline_tag: Option<String>,
|
pub pipeline_tag: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||||
pub struct ChatTemplate {
|
pub struct ChatTemplate {
|
||||||
name: String,
|
name: String,
|
||||||
template: String,
|
template: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||||
#[serde(untagged)]
|
#[serde(untagged)]
|
||||||
pub enum ChatTemplateVersions {
|
pub enum ChatTemplateVersions {
|
||||||
Single(String),
|
Single(String),
|
||||||
@ -55,7 +57,7 @@ pub enum ChatTemplateVersions {
|
|||||||
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Default)]
|
#[derive(Debug, Clone, Serialize ,Deserialize, Default)]
|
||||||
pub struct HubTokenizerConfig {
|
pub struct HubTokenizerConfig {
|
||||||
pub chat_template: Option<ChatTemplateVersions>,
|
pub chat_template: Option<ChatTemplateVersions>,
|
||||||
pub completion_template: Option<String>,
|
pub completion_template: Option<String>,
|
||||||
|
@ -23,6 +23,7 @@ use tower_http::cors::AllowOrigin;
|
|||||||
use tracing_subscriber::layer::SubscriberExt;
|
use tracing_subscriber::layer::SubscriberExt;
|
||||||
use tracing_subscriber::util::SubscriberInitExt;
|
use tracing_subscriber::util::SubscriberInitExt;
|
||||||
use tracing_subscriber::{filter::LevelFilter, EnvFilter, Layer};
|
use tracing_subscriber::{filter::LevelFilter, EnvFilter, Layer};
|
||||||
|
use text_generation_router::usage_stats;
|
||||||
|
|
||||||
/// App Configuration
|
/// App Configuration
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
@ -87,6 +88,10 @@ struct Args {
|
|||||||
disable_grammar_support: bool,
|
disable_grammar_support: bool,
|
||||||
#[clap(default_value = "4", long, env)]
|
#[clap(default_value = "4", long, env)]
|
||||||
max_client_batch_size: usize,
|
max_client_batch_size: usize,
|
||||||
|
#[clap(long, env, default_value_t)]
|
||||||
|
disable_usage_stats: bool,
|
||||||
|
#[clap(long, env, default_value_t)]
|
||||||
|
disable_crash_reports: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Subcommand)]
|
#[derive(Debug, Subcommand)]
|
||||||
@ -128,6 +133,8 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
messages_api_enabled,
|
messages_api_enabled,
|
||||||
disable_grammar_support,
|
disable_grammar_support,
|
||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
|
disable_usage_stats,
|
||||||
|
disable_crash_reports,
|
||||||
command,
|
command,
|
||||||
} = args;
|
} = args;
|
||||||
|
|
||||||
@ -374,8 +381,49 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Only send usage stats when TGI is run in docker
|
||||||
|
let is_docker = option_env!("DOCKER_LABEL").is_some();
|
||||||
|
|
||||||
|
let user_agent = if !disable_usage_stats && is_docker {
|
||||||
|
let reducded_args = usage_stats::Args::new(
|
||||||
|
config.clone(),
|
||||||
|
tokenizer_config.clone(),
|
||||||
|
max_concurrent_requests,
|
||||||
|
max_best_of,
|
||||||
|
max_stop_sequences,
|
||||||
|
max_top_n_tokens,
|
||||||
|
max_input_tokens,
|
||||||
|
max_total_tokens,
|
||||||
|
waiting_served_ratio,
|
||||||
|
max_batch_prefill_tokens,
|
||||||
|
max_batch_total_tokens,
|
||||||
|
max_waiting_tokens,
|
||||||
|
max_batch_size,
|
||||||
|
revision,
|
||||||
|
validation_workers,
|
||||||
|
json_output,
|
||||||
|
ngrok,
|
||||||
|
messages_api_enabled,
|
||||||
|
disable_grammar_support,
|
||||||
|
max_client_batch_size,
|
||||||
|
disable_usage_stats,
|
||||||
|
disable_crash_reports,
|
||||||
|
);
|
||||||
|
Some(usage_stats::UserAgent::new(reducded_args))
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(ref ua) = user_agent {
|
||||||
|
let start_event = usage_stats::UsageStatsEvent::new(ua.clone(), usage_stats::EventType::Start);
|
||||||
|
tokio::spawn(async move {
|
||||||
|
start_event.send().await;
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
// Run server
|
// Run server
|
||||||
server::run(
|
let result = server::run(
|
||||||
master_shard_uds_path,
|
master_shard_uds_path,
|
||||||
model_info,
|
model_info,
|
||||||
compat_return_full_text,
|
compat_return_full_text,
|
||||||
@ -406,9 +454,27 @@ async fn main() -> Result<(), RouterError> {
|
|||||||
max_client_batch_size,
|
max_client_batch_size,
|
||||||
print_schema_command,
|
print_schema_command,
|
||||||
)
|
)
|
||||||
.await?;
|
.await;
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(_) => {
|
||||||
|
if let Some(ref ua) = user_agent {
|
||||||
|
let stop_event = usage_stats::UsageStatsEvent::new(ua.clone(), usage_stats::EventType::Stop);
|
||||||
|
stop_event.send().await;
|
||||||
|
};
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Err(e) => {
|
||||||
|
if let Some(ref ua) = user_agent {
|
||||||
|
if !disable_crash_reports {
|
||||||
|
let error_event = usage_stats::UsageStatsEvent::new(ua.clone(), usage_stats::EventType::Error(e.to_string()));
|
||||||
|
error_event.send().await;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Err(RouterError::WebServer(e))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Init logging using env variables LOG_LEVEL and LOG_FORMAT:
|
/// Init logging using env variables LOG_LEVEL and LOG_FORMAT:
|
||||||
/// - otlp_endpoint is an optional URL to an Open Telemetry collector
|
/// - otlp_endpoint is an optional URL to an Open Telemetry collector
|
||||||
|
233
router/src/usage_stats.rs
Normal file
233
router/src/usage_stats.rs
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
use crate::{config::Config, HubTokenizerConfig};
|
||||||
|
use reqwest::header::HeaderMap;
|
||||||
|
use serde::Serialize;
|
||||||
|
use uuid::Uuid;
|
||||||
|
use std::{fmt, process::Command, time::Duration};
|
||||||
|
|
||||||
|
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct UserAgent {
|
||||||
|
pub uid: String,
|
||||||
|
pub args: Args,
|
||||||
|
pub env: Env,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UserAgent {
|
||||||
|
pub fn new(reduced_args: Args) -> Self {
|
||||||
|
Self {
|
||||||
|
uid: Uuid::new_v4().to_string(),
|
||||||
|
args: reduced_args,
|
||||||
|
env: Env::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug)]
|
||||||
|
pub enum EventType {
|
||||||
|
Start,
|
||||||
|
Stop,
|
||||||
|
Error(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize)]
|
||||||
|
pub struct UsageStatsEvent {
|
||||||
|
user_agent: UserAgent,
|
||||||
|
event_type: EventType,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl UsageStatsEvent {
|
||||||
|
pub fn new(user_agent: UserAgent, event_type: EventType) -> Self {
|
||||||
|
Self {
|
||||||
|
user_agent,
|
||||||
|
event_type,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub async fn send(&self) {
|
||||||
|
let mut headers = HeaderMap::new();
|
||||||
|
headers.insert("Content-Type", "application/json".parse().unwrap());
|
||||||
|
let body = serde_json::to_string(&self).unwrap();
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
let _ = client.post(TELEMETRY_URL)
|
||||||
|
.body(body)
|
||||||
|
.timeout(Duration::from_secs(5))
|
||||||
|
.send()
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize)]
|
||||||
|
pub struct Args {
|
||||||
|
model_config: Option<Config>,
|
||||||
|
tokenizer_config: HubTokenizerConfig,
|
||||||
|
max_concurrent_requests: usize,
|
||||||
|
max_best_of: usize,
|
||||||
|
max_stop_sequences: usize,
|
||||||
|
max_top_n_tokens: u32,
|
||||||
|
max_input_tokens: usize,
|
||||||
|
max_total_tokens: usize,
|
||||||
|
waiting_served_ratio: f32,
|
||||||
|
max_batch_prefill_tokens: u32,
|
||||||
|
max_batch_total_tokens: Option<u32>,
|
||||||
|
max_waiting_tokens: usize,
|
||||||
|
max_batch_size: Option<usize>,
|
||||||
|
revision: Option<String>,
|
||||||
|
validation_workers: usize,
|
||||||
|
json_output: bool,
|
||||||
|
ngrok: bool,
|
||||||
|
messages_api_enabled: bool,
|
||||||
|
disable_grammar_support: bool,
|
||||||
|
max_client_batch_size: usize,
|
||||||
|
disable_usage_stats: bool,
|
||||||
|
disable_crash_reports: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Args {
|
||||||
|
pub fn new(
|
||||||
|
model_config: Option<Config>,
|
||||||
|
tokenizer_config: HubTokenizerConfig,
|
||||||
|
max_concurrent_requests: usize,
|
||||||
|
max_best_of: usize,
|
||||||
|
max_stop_sequences: usize,
|
||||||
|
max_top_n_tokens: u32,
|
||||||
|
max_input_tokens: usize,
|
||||||
|
max_total_tokens: usize,
|
||||||
|
waiting_served_ratio: f32,
|
||||||
|
max_batch_prefill_tokens: u32,
|
||||||
|
max_batch_total_tokens: Option<u32>,
|
||||||
|
max_waiting_tokens: usize,
|
||||||
|
max_batch_size: Option<usize>,
|
||||||
|
revision: Option<String>,
|
||||||
|
validation_workers: usize,
|
||||||
|
json_output: bool,
|
||||||
|
ngrok: bool,
|
||||||
|
messages_api_enabled: bool,
|
||||||
|
disable_grammar_support: bool,
|
||||||
|
max_client_batch_size: usize,
|
||||||
|
disable_usage_stats: bool,
|
||||||
|
disable_crash_reports: bool,
|
||||||
|
) -> Self {
|
||||||
|
Self {
|
||||||
|
model_config,
|
||||||
|
tokenizer_config,
|
||||||
|
max_concurrent_requests,
|
||||||
|
max_best_of,
|
||||||
|
max_stop_sequences,
|
||||||
|
max_top_n_tokens,
|
||||||
|
max_input_tokens,
|
||||||
|
max_total_tokens,
|
||||||
|
waiting_served_ratio,
|
||||||
|
max_batch_prefill_tokens,
|
||||||
|
max_batch_total_tokens,
|
||||||
|
max_waiting_tokens,
|
||||||
|
max_batch_size,
|
||||||
|
revision,
|
||||||
|
validation_workers,
|
||||||
|
json_output,
|
||||||
|
ngrok,
|
||||||
|
messages_api_enabled,
|
||||||
|
disable_grammar_support,
|
||||||
|
max_client_batch_size,
|
||||||
|
disable_usage_stats,
|
||||||
|
disable_crash_reports,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This is more or less a copy of the code from the `text-generation-launcher` crate to avoid a dependency
|
||||||
|
#[derive(Serialize, Debug, Clone)]
|
||||||
|
pub struct Env {
|
||||||
|
git_sha: &'static str,
|
||||||
|
docker_label: &'static str,
|
||||||
|
nvidia_env: String,
|
||||||
|
xpu_env: String,
|
||||||
|
system_env: SystemInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Serialize, Debug, Clone)]
|
||||||
|
pub struct SystemInfo {
|
||||||
|
cpu_count: usize,
|
||||||
|
cpu_type: String,
|
||||||
|
total_memory: u64,
|
||||||
|
architecture: String,
|
||||||
|
platform: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SystemInfo {
|
||||||
|
fn new() -> Self {
|
||||||
|
let mut system = sysinfo::System::new_all();
|
||||||
|
system.refresh_all();
|
||||||
|
|
||||||
|
let cpu_count = system.cpus().len();
|
||||||
|
let cpu_type = system.cpus()[0].brand().to_string();
|
||||||
|
let total_memory = system.total_memory();
|
||||||
|
let architecture = std::env::consts::ARCH.to_string();
|
||||||
|
let platform = format!("{}-{}-{}",
|
||||||
|
std::env::consts::OS,
|
||||||
|
std::env::consts::FAMILY,
|
||||||
|
std::env::consts::ARCH
|
||||||
|
);
|
||||||
|
Self {
|
||||||
|
cpu_count,
|
||||||
|
cpu_type,
|
||||||
|
total_memory,
|
||||||
|
architecture,
|
||||||
|
platform,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SystemInfo {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
writeln!(f, "CPU Count: {}", self.cpu_count)?;
|
||||||
|
writeln!(f, "CPU Type: {}", self.cpu_type)?;
|
||||||
|
writeln!(f, "Total Memory: {}", self.total_memory)?;
|
||||||
|
writeln!(f, "Architecture: {}", self.architecture)?;
|
||||||
|
writeln!(f, "Platform: {}", self.platform)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Env {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
let nvidia_env = nvidia_smi();
|
||||||
|
let xpu_env = xpu_smi();
|
||||||
|
let system_env = SystemInfo::new();
|
||||||
|
|
||||||
|
Self {
|
||||||
|
system_env,
|
||||||
|
nvidia_env: nvidia_env.unwrap_or("N/A".to_string()),
|
||||||
|
xpu_env: xpu_env.unwrap_or("N/A".to_string()),
|
||||||
|
git_sha: option_env!("VERGEN_GIT_SHA").unwrap_or("N/A"),
|
||||||
|
docker_label: option_env!("DOCKER_LABEL").unwrap_or("N/A"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Env {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
writeln!(f, "Runtime environment:")?;
|
||||||
|
writeln!(f, "Commit sha: {}", self.git_sha)?;
|
||||||
|
writeln!(f, "Docker label: {}", self.docker_label)?;
|
||||||
|
writeln!(f, "nvidia-smi:\n{}", self.nvidia_env)?;
|
||||||
|
write!(f, "xpu-smi:\n{}\n", self.xpu_env)?;
|
||||||
|
write!(f, "System:\n{}", self.system_env)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn nvidia_smi() -> Option<String> {
|
||||||
|
let output = Command::new("nvidia-smi").output().ok()?;
|
||||||
|
let nvidia_smi = String::from_utf8(output.stdout).ok()?;
|
||||||
|
let output = nvidia_smi.replace('\n', "\n ");
|
||||||
|
Some(output.trim().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn xpu_smi() -> Option<String> {
|
||||||
|
let output = Command::new("xpu-smi").arg("discovery").output().ok()?;
|
||||||
|
let xpu_smi = String::from_utf8(output.stdout).ok()?;
|
||||||
|
let output = xpu_smi.replace('\n', "\n ");
|
||||||
|
Some(output.trim().to_string())
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user