mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 03:44:54 +00:00
minor fix
This commit is contained in:
parent
5cfd4b168a
commit
d1cf64abc4
@ -39,7 +39,7 @@ httpcore==1.0.7
|
|||||||
# via httpx
|
# via httpx
|
||||||
httpx==0.28.1
|
httpx==0.28.1
|
||||||
# via openai
|
# via openai
|
||||||
huggingface-hub==0.30.1
|
huggingface-hub==0.29.3
|
||||||
# via
|
# via
|
||||||
# text-generation-integration-tests (pyproject.toml)
|
# text-generation-integration-tests (pyproject.toml)
|
||||||
# text-generation
|
# text-generation
|
||||||
|
@ -661,7 +661,7 @@ class BloomModel(BloomPreTrainedModel):
|
|||||||
|
|
||||||
return combined_attention_mask
|
return combined_attention_mask
|
||||||
|
|
||||||
def set_inputs_embeds(self, new_embeddings: torch.Tensor):
|
def set_input_embeddings(self, new_embeddings: torch.Tensor):
|
||||||
self.word_embeddings = new_embeddings
|
self.word_embeddings = new_embeddings
|
||||||
|
|
||||||
def forward(
|
def forward(
|
||||||
|
@ -959,7 +959,6 @@ class MllamaForConditionalGeneration(nn.Module):
|
|||||||
# XXX: Putting these as optional so that the cuda warmup calls can go through.
|
# XXX: Putting these as optional so that the cuda warmup calls can go through.
|
||||||
cross_attention_states: Optional[torch.Tensor] = None,
|
cross_attention_states: Optional[torch.Tensor] = None,
|
||||||
image_indices=None,
|
image_indices=None,
|
||||||
inputs_embeds=None,
|
|
||||||
):
|
):
|
||||||
if cross_attention_states is not None:
|
if cross_attention_states is not None:
|
||||||
seqlen_q = len(image_indices)
|
seqlen_q = len(image_indices)
|
||||||
|
Loading…
Reference in New Issue
Block a user