mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Clarify disabling.
This commit is contained in:
parent
edcbc0890c
commit
6951962ffd
@ -209,7 +209,7 @@ Options:
|
||||
## CUDA_GRAPHS
|
||||
```shell
|
||||
--cuda-graphs <CUDA_GRAPHS>
|
||||
Specify the batch sizes to compute cuda graphs for
|
||||
Specify the batch sizes to compute cuda graphs for. Use "0" to disable
|
||||
|
||||
[env: CUDA_GRAPHS=]
|
||||
[default: 1,2,4,8,16,32,64,96,128]
|
||||
|
@ -284,7 +284,8 @@ struct Args {
|
||||
#[clap(long, env)]
|
||||
max_batch_size: Option<usize>,
|
||||
|
||||
/// Specify the batch sizes to compute cuda graphs for
|
||||
/// Specify the batch sizes to compute cuda graphs for.
|
||||
/// Use "0" to disable.
|
||||
#[clap(
|
||||
long,
|
||||
env,
|
||||
@ -954,7 +955,11 @@ fn spawn_shards(
|
||||
let disable_custom_kernels = args.disable_custom_kernels;
|
||||
let watermark_gamma = args.watermark_gamma;
|
||||
let watermark_delta = args.watermark_delta;
|
||||
let cuda_graphs = args.cuda_graphs.clone();
|
||||
let cuda_graphs: Vec<usize> = args
|
||||
.cuda_graphs
|
||||
.iter()
|
||||
.filter_map(|&c| if c > 0 { Some(c) } else { None })
|
||||
.collect();
|
||||
let cuda_memory_fraction = args.cuda_memory_fraction;
|
||||
let rope_scaling = args.rope_scaling;
|
||||
let rope_factor = args.rope_factor;
|
||||
|
Loading…
Reference in New Issue
Block a user