Clarify disabling.

This commit is contained in:
Nicolas Patry 2024-04-04 12:59:29 +00:00
parent edcbc0890c
commit 6951962ffd
2 changed files with 8 additions and 3 deletions

View File

@ -209,7 +209,7 @@ Options:
## CUDA_GRAPHS
```shell
--cuda-graphs <CUDA_GRAPHS>
Specify the batch sizes to compute cuda graphs for
Specify the batch sizes to compute cuda graphs for. Use "0" to disable
[env: CUDA_GRAPHS=]
[default: 1,2,4,8,16,32,64,96,128]

View File

@ -284,7 +284,8 @@ struct Args {
#[clap(long, env)]
max_batch_size: Option<usize>,
/// Specify the batch sizes to compute cuda graphs for
/// Specify the batch sizes to compute cuda graphs for.
/// Use "0" to disable.
#[clap(
long,
env,
@ -954,7 +955,11 @@ fn spawn_shards(
let disable_custom_kernels = args.disable_custom_kernels;
let watermark_gamma = args.watermark_gamma;
let watermark_delta = args.watermark_delta;
let cuda_graphs = args.cuda_graphs.clone();
let cuda_graphs: Vec<usize> = args
.cuda_graphs
.iter()
.filter_map(|&c| if c > 0 { Some(c) } else { None })
.collect();
let cuda_memory_fraction = args.cuda_memory_fraction;
let rope_scaling = args.rope_scaling;
let rope_factor = args.rope_factor;