mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Clarify disabling.
This commit is contained in:
parent
edcbc0890c
commit
6951962ffd
@ -209,7 +209,7 @@ Options:
|
|||||||
## CUDA_GRAPHS
|
## CUDA_GRAPHS
|
||||||
```shell
|
```shell
|
||||||
--cuda-graphs <CUDA_GRAPHS>
|
--cuda-graphs <CUDA_GRAPHS>
|
||||||
Specify the batch sizes to compute cuda graphs for
|
Specify the batch sizes to compute cuda graphs for. Use "0" to disable
|
||||||
|
|
||||||
[env: CUDA_GRAPHS=]
|
[env: CUDA_GRAPHS=]
|
||||||
[default: 1,2,4,8,16,32,64,96,128]
|
[default: 1,2,4,8,16,32,64,96,128]
|
||||||
|
@ -284,7 +284,8 @@ struct Args {
|
|||||||
#[clap(long, env)]
|
#[clap(long, env)]
|
||||||
max_batch_size: Option<usize>,
|
max_batch_size: Option<usize>,
|
||||||
|
|
||||||
/// Specify the batch sizes to compute cuda graphs for
|
/// Specify the batch sizes to compute cuda graphs for.
|
||||||
|
/// Use "0" to disable.
|
||||||
#[clap(
|
#[clap(
|
||||||
long,
|
long,
|
||||||
env,
|
env,
|
||||||
@ -954,7 +955,11 @@ fn spawn_shards(
|
|||||||
let disable_custom_kernels = args.disable_custom_kernels;
|
let disable_custom_kernels = args.disable_custom_kernels;
|
||||||
let watermark_gamma = args.watermark_gamma;
|
let watermark_gamma = args.watermark_gamma;
|
||||||
let watermark_delta = args.watermark_delta;
|
let watermark_delta = args.watermark_delta;
|
||||||
let cuda_graphs = args.cuda_graphs.clone();
|
let cuda_graphs: Vec<usize> = args
|
||||||
|
.cuda_graphs
|
||||||
|
.iter()
|
||||||
|
.filter_map(|&c| if c > 0 { Some(c) } else { None })
|
||||||
|
.collect();
|
||||||
let cuda_memory_fraction = args.cuda_memory_fraction;
|
let cuda_memory_fraction = args.cuda_memory_fraction;
|
||||||
let rope_scaling = args.rope_scaling;
|
let rope_scaling = args.rope_scaling;
|
||||||
let rope_factor = args.rope_factor;
|
let rope_factor = args.rope_factor;
|
||||||
|
Loading…
Reference in New Issue
Block a user