mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-28 13:32:10 +00:00
fix: add adapter_data param and avoid missing layers
This commit is contained in:
parent
91f407226d
commit
b1169273fd
@ -672,6 +672,7 @@ class FlashRWForCausalLM(FlashRWPreTrainedModel):
|
|||||||
max_s: int,
|
max_s: int,
|
||||||
prefill_cache_indices: Optional[torch.Tensor],
|
prefill_cache_indices: Optional[torch.Tensor],
|
||||||
lm_head_indices: Optional[torch.Tensor] = None,
|
lm_head_indices: Optional[torch.Tensor] = None,
|
||||||
|
adapter_data: Optional[torch.Tensor] = None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
hidden_states = self.transformer(
|
hidden_states = self.transformer(
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -488,6 +488,7 @@ class FlashSantacoderForCausalLM(nn.Module):
|
|||||||
max_s: int,
|
max_s: int,
|
||||||
prefill_cache_indices: Optional[torch.Tensor],
|
prefill_cache_indices: Optional[torch.Tensor],
|
||||||
lm_head_indices: Optional[torch.Tensor] = None,
|
lm_head_indices: Optional[torch.Tensor] = None,
|
||||||
|
adapter_data: Optional[torch.Tensor] = None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
hidden_states = self.transformer(
|
hidden_states = self.transformer(
|
||||||
input_ids,
|
input_ids,
|
||||||
|
@ -525,6 +525,7 @@ class FlashStarcoder2ForCausalLM(torch.nn.Module):
|
|||||||
max_s: int,
|
max_s: int,
|
||||||
prefill_cache_indices: Optional[torch.Tensor],
|
prefill_cache_indices: Optional[torch.Tensor],
|
||||||
lm_head_indices: Optional[torch.Tensor] = None,
|
lm_head_indices: Optional[torch.Tensor] = None,
|
||||||
|
adapter_data: Optional[torch.Tensor] = None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
true_max_s = max_s
|
true_max_s = max_s
|
||||||
if prefill_cache_indices is not None:
|
if prefill_cache_indices is not None:
|
||||||
|
@ -147,6 +147,9 @@ class BaseFlashMistral(FlashCausalLM):
|
|||||||
layer.self_attn.o_proj,
|
layer.self_attn.o_proj,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO: this is a hack to avoid the gate_proj for
|
||||||
|
# FlashStarcoder2 that doesnt have these layers
|
||||||
|
if hasattr(layer.mlp, "gate_up_proj"):
|
||||||
layer_weights[(i, "gate_proj")] = (
|
layer_weights[(i, "gate_proj")] = (
|
||||||
f"{prefix}.{i}.mlp.gate_proj",
|
f"{prefix}.{i}.mlp.gate_proj",
|
||||||
layer.mlp.gate_up_proj,
|
layer.mlp.gate_up_proj,
|
||||||
|
@ -634,6 +634,7 @@ class IdeficsCausalLM(Model):
|
|||||||
tokenizer.add_special_tokens({"pad_token": "<unk>"})
|
tokenizer.add_special_tokens({"pad_token": "<unk>"})
|
||||||
|
|
||||||
super(IdeficsCausalLM, self).__init__(
|
super(IdeficsCausalLM, self).__init__(
|
||||||
|
model_id=model_id,
|
||||||
model=model,
|
model=model,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
requires_padding=True,
|
requires_padding=True,
|
||||||
|
Loading…
Reference in New Issue
Block a user