From eb40f8ccdad5d22f3d0b77680c3848901971168c Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 1 Feb 2024 15:30:02 +0000
Subject: [PATCH] Marking the flag as really not the fastest and BETA.

---
 launcher/src/main.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 5fee3e91..9c8abf8f 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -47,8 +47,11 @@ enum Quantization {
     /// Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better
     /// perplexity performance for you model
     BitsandbytesFP4,
+    /// [BETA]
     /// [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above
     /// This dtype has native ops should be the fastest if available.
+    /// This is currently not the fastest because of local unpacking + padding to satisfy matrix
+    /// multiplication limitations.
     Fp8,
 }