Apply suggestions from code review

Co-authored-by: OlivierDehaene <olivier@huggingface.co>
2025-09-09 19:34:53 +00:00 · 2023-06-07 14:59:29 +02:00 · 2023-06-07 14:59:29 +02:00 · 6ddcd1582c
commit 6ddcd1582c
parent c6ac50e42b
3 changed files with 0 additions and 8 deletions
--- a/2
+++ b/2
@ -105,7 +105,6 @@ WORKDIR /usr/src
 COPY server/custom_kernels/ .

 # Build specific version of transformers
-RUN pip install ninja
 RUN python setup.py build

 # Text Generation Inference base image
@ -137,7 +136,6 @@ COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib
 COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages

 # Copy build artifacts from transformers builder
-COPY --from=custom-kernels-builder /usr/src/custom_kernels /usr/src/custom_kernels
 COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39/custom_kernels /usr/src/custom-kernels/src/custom_kernels

 # Install transformers dependencies
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -54,7 +54,6 @@ __all__ = [
    "BLOOMSharded",
    "CausalLM",
    "FlashCausalLM",
-    "Galactica",
    "GalacticaSharded",
    "Seq2SeqLM",
    "SantaCoder",
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -301,14 +301,9 @@ class FlashLlamaModel(torch.nn.Module):
        self.layers = nn.ModuleList(
            [
                FlashLlamaLayer(
-                    # config.num_attention_heads,
-                    # config.hidden_act,
-                    # config.hidden_size,
-                    # config.intermediate_size,
                    layer_id,
                    config,
                    weights,
-                    # config.rms_norm_eps,
                )
                for layer_id in range(config.num_hidden_layers)
            ]