fmt

2025-09-10 11:54:52 +00:00 · 2023-07-24 13:59:10 +02:00 · 2023-07-24 13:59:10 +02:00 · 74c87f5888
commit 74c87f5888
parent a6057c4076
2 changed files with 3 additions and 2 deletions
--- a/server/text_generation_server/server.py
+++ b/server/text_generation_server/server.py
@ -146,8 +146,10 @@ def serve(
                # For which we have the finale shapes only after the model has loaded
                # This will allocate those buffers.
                from text_generation_server.utils.gptq.exllama import (
-                create_exllama_buffers, set_device
+                    create_exllama_buffers,
+                    set_device,
                )
+
                set_device(model.device)
                create_exllama_buffers()
            except ImportError:
--- a/server/text_generation_server/utils/gptq/exllama.py
+++ b/server/text_generation_server/utils/gptq/exllama.py
@ -12,7 +12,6 @@ def ext_make_q4(qweight, qzeros, scales, g_idx, device):
    )


-
 def ext_q4_matmul(x, q4, q4_width):
    """Matrix multiplication, returns x @ q4"""
    outshape = x.shape[:-1] + (q4_width,)