From ecb0db45afde6de703a82e8c9c73586ca50bb55d Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Fri, 15 Dec 2023 14:56:17 +0100 Subject: [PATCH] fix: fix logic if sliding window key is not present in config (#1352) --- .../models/custom_modeling/flash_mistral_modeling.py | 2 +- .../models/custom_modeling/flash_mixtral_modeling.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py index c85624f3..0fc4e1b3 100644 --- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py @@ -60,7 +60,7 @@ class MistralConfig(PretrainedConfig): pretraining_tp=1, tie_word_embeddings=False, rope_theta=10000.0, - sliding_window=4096, + sliding_window=None, **kwargs, ): self.vocab_size = vocab_size diff --git a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py index b468d09b..61488ec4 100644 --- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py @@ -72,7 +72,7 @@ class MixtralConfig(PretrainedConfig): pretraining_tp=1, tie_word_embeddings=False, rope_theta=10000.0, - sliding_window=4096, + sliding_window=None, num_experts_per_tok=2, num_local_experts=8, **kwargs,