mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-19 22:02:06 +00:00
backend(vllm): disable metrics for now
This commit is contained in:
parent
003163a2b9
commit
5452c1294c
@ -63,6 +63,12 @@ impl TryFrom<&RequestOutput> for InferStreamResponse {
|
|||||||
.unwrap(),
|
.unwrap(),
|
||||||
seed: None,
|
seed: None,
|
||||||
},
|
},
|
||||||
|
// start: STARTUP_INSTANT
|
||||||
|
// .checked_sub(Duration::from_secs_f32(metrics.first_scheduled_time))
|
||||||
|
// .unwrap_or_else(Instant::now),
|
||||||
|
// queued: STARTUP_INSTANT
|
||||||
|
// .checked_sub(Duration::from_secs_f32(metrics.arrival_time))
|
||||||
|
// .unwrap_or_else(Instant::now),
|
||||||
start: Instant::now(),
|
start: Instant::now(),
|
||||||
queued: Instant::now(),
|
queued: Instant::now(),
|
||||||
})
|
})
|
||||||
|
@ -11,7 +11,7 @@ use pyo3::types::PyModule;
|
|||||||
use pyo3::{Py, PyAny, PyErr, PyObject, Python};
|
use pyo3::{Py, PyAny, PyErr, PyObject, Python};
|
||||||
use tokio::time::Instant;
|
use tokio::time::Instant;
|
||||||
|
|
||||||
pub(crate) const STARTUP_INSTANT: Instant = Instant::now();
|
pub(crate) static STARTUP_INSTANT: Instant = Instant::now();
|
||||||
|
|
||||||
static PY_TOKENS_PROMPT_CLASS: GILOnceCell<Py<PyAny>> = GILOnceCell::new();
|
static PY_TOKENS_PROMPT_CLASS: GILOnceCell<Py<PyAny>> = GILOnceCell::new();
|
||||||
static PY_SAMPLING_PARAMS_CLASS: GILOnceCell<Py<PyAny>> = GILOnceCell::new();
|
static PY_SAMPLING_PARAMS_CLASS: GILOnceCell<Py<PyAny>> = GILOnceCell::new();
|
||||||
|
Loading…
Reference in New Issue
Block a user