diff --git a/Dockerfile b/Dockerfile index ae53b748..056f2f2b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -105,7 +105,6 @@ WORKDIR /usr/src COPY server/custom_kernels/ . # Build specific version of transformers -RUN pip install ninja RUN python setup.py build # Text Generation Inference base image @@ -137,7 +136,6 @@ COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages # Copy build artifacts from transformers builder -COPY --from=custom-kernels-builder /usr/src/custom_kernels /usr/src/custom_kernels COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39/custom_kernels /usr/src/custom-kernels/src/custom_kernels # Install transformers dependencies diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index aa3eca33..19b0ce63 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -54,7 +54,6 @@ __all__ = [ "BLOOMSharded", "CausalLM", "FlashCausalLM", - "Galactica", "GalacticaSharded", "Seq2SeqLM", "SantaCoder", diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index f27bd0d5..9b3353e9 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -301,14 +301,9 @@ class FlashLlamaModel(torch.nn.Module): self.layers = nn.ModuleList( [ FlashLlamaLayer( - # config.num_attention_heads, - # config.hidden_act, - # config.hidden_size, - # config.intermediate_size, layer_id, config, weights, - # config.rms_norm_eps, ) for layer_id in range(config.num_hidden_layers) ]