mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
fix megablocks install
This commit is contained in:
parent
d0aff8e5e2
commit
008733313c
@ -157,7 +157,7 @@ RUN make build-vllm-cuda
|
|||||||
# Build megablocks
|
# Build megablocks
|
||||||
FROM kernel-builder as megablocks-builder
|
FROM kernel-builder as megablocks-builder
|
||||||
|
|
||||||
RUN pip install git+https://github.com/OlivierDehaene/megablocks@16c5350f7b313a5ab52ab109feb45f159f1e5d3d
|
RUN pip install git+https://github.com/OlivierDehaene/megablocks@3de516d9b774ea5dd1b79c68e2c475880f3983e7
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
FROM nvidia/cuda:12.1.0-base-ubuntu20.04 as base
|
FROM nvidia/cuda:12.1.0-base-ubuntu20.04 as base
|
||||||
|
@ -629,6 +629,9 @@ pub async fn run(
|
|||||||
// Batch size buckets
|
// Batch size buckets
|
||||||
let batch_size_matcher = Matcher::Full(String::from("tgi_batch_next_size"));
|
let batch_size_matcher = Matcher::Full(String::from("tgi_batch_next_size"));
|
||||||
let batch_size_buckets: Vec<f64> = (0..1024).map(|x| (x + 1) as f64).collect();
|
let batch_size_buckets: Vec<f64> = (0..1024).map(|x| (x + 1) as f64).collect();
|
||||||
|
// Speculated tokens buckets
|
||||||
|
let skipped_matcher = Matcher::Full(String::from("tgi_request_skipped_tokens"));
|
||||||
|
let skipped_buckets: Vec<f64> = (0..shard_info.speculate + 1).map(|x| x as f64).collect();
|
||||||
|
|
||||||
// Prometheus handler
|
// Prometheus handler
|
||||||
let builder = PrometheusBuilder::new()
|
let builder = PrometheusBuilder::new()
|
||||||
@ -641,6 +644,8 @@ pub async fn run(
|
|||||||
.set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets)
|
.set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)
|
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)
|
||||||
|
.unwrap()
|
||||||
|
.set_buckets_for_metric(skipped_matcher, &skipped_buckets)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let prom_handle = builder
|
let prom_handle = builder
|
||||||
.install_recorder()
|
.install_recorder()
|
||||||
|
@ -17,7 +17,7 @@ gen-server:
|
|||||||
touch text_generation_server/pb/__init__.py
|
touch text_generation_server/pb/__init__.py
|
||||||
|
|
||||||
install-megablocks:
|
install-megablocks:
|
||||||
pip install git+https://github.com/OlivierDehaene/megablocks@16c5350f7b313a5ab52ab109feb45f159f1e5d3d
|
pip install git+https://github.com/OlivierDehaene/megablocks@3de516d9b774ea5dd1b79c68e2c475880f3983e7
|
||||||
|
|
||||||
install: gen-server
|
install: gen-server
|
||||||
pip install pip --upgrade
|
pip install pip --upgrade
|
||||||
|
Loading…
Reference in New Issue
Block a user