Clarify disabling.

2025-09-11 12:24:53 +00:00 · 2024-04-04 12:59:29 +00:00 · 2024-04-04 12:59:29 +00:00 · 6951962ffd
commit 6951962ffd
parent edcbc0890c
2 changed files with 8 additions and 3 deletions
--- a/docs/source/basic_tutorials/launcher.md
+++ b/docs/source/basic_tutorials/launcher.md
@ -209,7 +209,7 @@ Options:
 ## CUDA_GRAPHS
 ```shell
      --cuda-graphs <CUDA_GRAPHS>
-          Specify the batch sizes to compute cuda graphs for
+          Specify the batch sizes to compute cuda graphs for. Use "0" to disable
          [env: CUDA_GRAPHS=]
          [default: 1,2,4,8,16,32,64,96,128]
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -284,7 +284,8 @@ struct Args {
    #[clap(long, env)]
    max_batch_size: Option<usize>,
-    /// Specify the batch sizes to compute cuda graphs for
+    /// Specify the batch sizes to compute cuda graphs for.
    /// Use "0" to disable.
    #[clap(
        long,
        env,
@ -954,7 +955,11 @@ fn spawn_shards(
        let disable_custom_kernels = args.disable_custom_kernels;
        let watermark_gamma = args.watermark_gamma;
        let watermark_delta = args.watermark_delta;
-        let cuda_graphs = args.cuda_graphs.clone();
+        let cuda_graphs: Vec<usize> = args
            .cuda_graphs
            .iter()
            .filter_map(|&c| if c > 0 { Some(c) } else { None })
            .collect();
        let cuda_memory_fraction = args.cuda_memory_fraction;
        let rope_scaling = args.rope_scaling;
        let rope_factor = args.rope_factor;