From 3643d1cd9ee380d12008af380fc215e484cd3521 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 15 Aug 2024 09:17:06 +0200
Subject: [PATCH] Removing serde override.

---
 launcher/src/main.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 58abb306..bb1d881f 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -89,10 +89,10 @@ enum Quantization {
     Bitsandbytes,
     /// Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x,
     /// but it is known that the model will be much slower to run than the native f16.
-    BitsandbytesNF4,
+    BitsandbytesNf4,
     /// Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better
     /// perplexity performance for you model
-    BitsandbytesFP4,
+    BitsandbytesFp4,
     /// [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above
     /// This dtype has native ops should be the fastest if available.
     /// This is currently not the fastest because of local unpacking + padding to satisfy matrix
@@ -109,10 +109,10 @@ impl std::fmt::Display for Quantization {
             Quantization::Bitsandbytes => {
                 write!(f, "bitsandbytes")
             }
-            Quantization::BitsandbytesNF4 => {
+            Quantization::BitsandbytesNf4 => {
                 write!(f, "bitsandbytes-nf4")
             }
-            Quantization::BitsandbytesFP4 => {
+            Quantization::BitsandbytesFp4 => {
                 write!(f, "bitsandbytes-fp4")
             }
             Quantization::Exl2 => {
@@ -1566,8 +1566,8 @@ fn main() -> Result<(), LauncherError> {
             None,
             Some(
                 Quantization::Bitsandbytes
-                | Quantization::BitsandbytesNF4
-                | Quantization::BitsandbytesFP4,
+                | Quantization::BitsandbytesNf4
+                | Quantization::BitsandbytesFp4,
             ),
         ) => {
             tracing::info!("Bitsandbytes doesn't work with cuda graphs, deactivating them");