mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
Apply suggestions from code review
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
parent
c6ac50e42b
commit
6ddcd1582c
@ -105,7 +105,6 @@ WORKDIR /usr/src
|
||||
COPY server/custom_kernels/ .
|
||||
|
||||
# Build specific version of transformers
|
||||
RUN pip install ninja
|
||||
RUN python setup.py build
|
||||
|
||||
# Text Generation Inference base image
|
||||
@ -137,7 +136,6 @@ COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib
|
||||
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||
|
||||
# Copy build artifacts from transformers builder
|
||||
COPY --from=custom-kernels-builder /usr/src/custom_kernels /usr/src/custom_kernels
|
||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39/custom_kernels /usr/src/custom-kernels/src/custom_kernels
|
||||
|
||||
# Install transformers dependencies
|
||||
|
@ -54,7 +54,6 @@ __all__ = [
|
||||
"BLOOMSharded",
|
||||
"CausalLM",
|
||||
"FlashCausalLM",
|
||||
"Galactica",
|
||||
"GalacticaSharded",
|
||||
"Seq2SeqLM",
|
||||
"SantaCoder",
|
||||
|
@ -301,14 +301,9 @@ class FlashLlamaModel(torch.nn.Module):
|
||||
self.layers = nn.ModuleList(
|
||||
[
|
||||
FlashLlamaLayer(
|
||||
# config.num_attention_heads,
|
||||
# config.hidden_act,
|
||||
# config.hidden_size,
|
||||
# config.intermediate_size,
|
||||
layer_id,
|
||||
config,
|
||||
weights,
|
||||
# config.rms_norm_eps,
|
||||
)
|
||||
for layer_id in range(config.num_hidden_layers)
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user