fix megablocks install

This commit is contained in:
OlivierDehaene 2023-12-11 13:34:51 +01:00
parent d0aff8e5e2
commit 008733313c
3 changed files with 7 additions and 2 deletions

View File

@ -157,7 +157,7 @@ RUN make build-vllm-cuda
# Build megablocks # Build megablocks
FROM kernel-builder as megablocks-builder FROM kernel-builder as megablocks-builder
RUN pip install git+https://github.com/OlivierDehaene/megablocks@16c5350f7b313a5ab52ab109feb45f159f1e5d3d RUN pip install git+https://github.com/OlivierDehaene/megablocks@3de516d9b774ea5dd1b79c68e2c475880f3983e7
# Text Generation Inference base image # Text Generation Inference base image
FROM nvidia/cuda:12.1.0-base-ubuntu20.04 as base FROM nvidia/cuda:12.1.0-base-ubuntu20.04 as base

View File

@ -629,6 +629,9 @@ pub async fn run(
// Batch size buckets // Batch size buckets
let batch_size_matcher = Matcher::Full(String::from("tgi_batch_next_size")); let batch_size_matcher = Matcher::Full(String::from("tgi_batch_next_size"));
let batch_size_buckets: Vec<f64> = (0..1024).map(|x| (x + 1) as f64).collect(); let batch_size_buckets: Vec<f64> = (0..1024).map(|x| (x + 1) as f64).collect();
// Speculated tokens buckets
let skipped_matcher = Matcher::Full(String::from("tgi_request_skipped_tokens"));
let skipped_buckets: Vec<f64> = (0..shard_info.speculate + 1).map(|x| x as f64).collect();
// Prometheus handler // Prometheus handler
let builder = PrometheusBuilder::new() let builder = PrometheusBuilder::new()
@ -641,6 +644,8 @@ pub async fn run(
.set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets) .set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets)
.unwrap() .unwrap()
.set_buckets_for_metric(batch_size_matcher, &batch_size_buckets) .set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)
.unwrap()
.set_buckets_for_metric(skipped_matcher, &skipped_buckets)
.unwrap(); .unwrap();
let prom_handle = builder let prom_handle = builder
.install_recorder() .install_recorder()

View File

@ -17,7 +17,7 @@ gen-server:
touch text_generation_server/pb/__init__.py touch text_generation_server/pb/__init__.py
install-megablocks: install-megablocks:
pip install git+https://github.com/OlivierDehaene/megablocks@16c5350f7b313a5ab52ab109feb45f159f1e5d3d pip install git+https://github.com/OlivierDehaene/megablocks@3de516d9b774ea5dd1b79c68e2c475880f3983e7
install: gen-server install: gen-server
pip install pip --upgrade pip install pip --upgrade