mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
Apply suggestions from code review
Co-authored-by: OlivierDehaene <olivier@huggingface.co>
This commit is contained in:
parent
c6ac50e42b
commit
6ddcd1582c
@ -105,7 +105,6 @@ WORKDIR /usr/src
|
|||||||
COPY server/custom_kernels/ .
|
COPY server/custom_kernels/ .
|
||||||
|
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN pip install ninja
|
|
||||||
RUN python setup.py build
|
RUN python setup.py build
|
||||||
|
|
||||||
# Text Generation Inference base image
|
# Text Generation Inference base image
|
||||||
@ -137,7 +136,6 @@ COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib
|
|||||||
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
COPY --from=flash-att-builder /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-39 /opt/conda/lib/python3.9/site-packages
|
||||||
|
|
||||||
# Copy build artifacts from transformers builder
|
# Copy build artifacts from transformers builder
|
||||||
COPY --from=custom-kernels-builder /usr/src/custom_kernels /usr/src/custom_kernels
|
|
||||||
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39/custom_kernels /usr/src/custom-kernels/src/custom_kernels
|
COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-39/custom_kernels /usr/src/custom-kernels/src/custom_kernels
|
||||||
|
|
||||||
# Install transformers dependencies
|
# Install transformers dependencies
|
||||||
|
@ -54,7 +54,6 @@ __all__ = [
|
|||||||
"BLOOMSharded",
|
"BLOOMSharded",
|
||||||
"CausalLM",
|
"CausalLM",
|
||||||
"FlashCausalLM",
|
"FlashCausalLM",
|
||||||
"Galactica",
|
|
||||||
"GalacticaSharded",
|
"GalacticaSharded",
|
||||||
"Seq2SeqLM",
|
"Seq2SeqLM",
|
||||||
"SantaCoder",
|
"SantaCoder",
|
||||||
|
@ -301,14 +301,9 @@ class FlashLlamaModel(torch.nn.Module):
|
|||||||
self.layers = nn.ModuleList(
|
self.layers = nn.ModuleList(
|
||||||
[
|
[
|
||||||
FlashLlamaLayer(
|
FlashLlamaLayer(
|
||||||
# config.num_attention_heads,
|
|
||||||
# config.hidden_act,
|
|
||||||
# config.hidden_size,
|
|
||||||
# config.intermediate_size,
|
|
||||||
layer_id,
|
layer_id,
|
||||||
config,
|
config,
|
||||||
weights,
|
weights,
|
||||||
# config.rms_norm_eps,
|
|
||||||
)
|
)
|
||||||
for layer_id in range(config.num_hidden_layers)
|
for layer_id in range(config.num_hidden_layers)
|
||||||
]
|
]
|
||||||
|
Loading…
Reference in New Issue
Block a user