From 4a02d3505f3b8a14c71e50e344a68113c18b1981 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Thu, 4 Apr 2024 18:48:58 +0200
Subject: [PATCH] add contiguous

---
 .../models/custom_modeling/flash_cohere_modeling.py           | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
index 9208a595..6376788b 100644
--- a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
@@ -222,8 +222,8 @@ class FlashCohereAttention(torch.nn.Module):
             dim=1,
         )
         if self.use_qk_norm:
-            query = self.q_norm(query)
-            key = self.k_norm(key)
+            query = self.q_norm(query.contiguous())
+            key = self.k_norm(key.contiguous())
 
         query = query.view(-1, self.num_heads, self.head_size)
         key = key.view(-1, self.num_key_value_heads, self.head_size)