bump torch to more recent version

2025-09-10 20:04:52 +00:00 · 2024-06-28 13:10:43 +00:00 · 2024-06-28 13:10:43 +00:00 · 3d50ff71b7
commit 3d50ff71b7
parent 87db820627
4 changed files with 11 additions and 4 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -251,7 +251,7 @@ jobs:
      - name: Run tests
        run: |
          export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
-          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HF_TOKEN }}
+          export HF_TOKEN=${{ secrets.HF_TOKEN }}

          export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
          echo "DOCKER_IMAGE:"
--- a/5
+++ b/5
@ -96,7 +96,10 @@ RUN pip uninstall -y triton && \
    cd triton/python && \
    pip install .

-RUN git clone --depth 1 --recursive --single-branch --branch 2.3-patched https://github.com/fxmarty/pytorch.git pytorch && cd pytorch && pip install -r requirements.txt --no-cache-dir
+RUN git clone --depth 1 --recursive --single-branch --branch 2.3-patched https://github.com/fxmarty/pytorch.git pytorch && \
+    cd pytorch && \
+    git checkout adbb514d024d4ba1b6f9c126d321fd5e7597e966 && \
+    pip install -r requirements.txt --no-cache-dir

 ARG _GLIBCXX_USE_CXX11_ABI="1"
 ARG CMAKE_PREFIX_PATH="/opt/conda"
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@ -309,7 +309,9 @@ class LlamaMLP(nn.Module):
                dtype=hidden_states.dtype,
                device="cuda",
            )
-            _custom_C.LLMM_Silu(self.gate_up_proj.linear.weight, hidden_states, out, 8)
+            _custom_C.LLMM_Silu(
+                self.gate_up_proj.base_layer.linear.weight, hidden_states, out, 8
+            )
            return self.down_proj(out, adapter_data)
        else:
            gate_up_states = self.gate_up_proj(hidden_states, adapter_data)
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@ -314,7 +314,9 @@ class MistralMLP(nn.Module):
                dtype=hidden_states.dtype,
                device="cuda",
            )
-            _custom_C.LLMM_Silu(self.gate_up_proj.linear.weight, hidden_states, out, 8)
+            _custom_C.LLMM_Silu(
+                self.gate_up_proj.base_layer.linear.weight, hidden_states, out, 8
+            )
            return self.down_proj(out, adapter_data)
        else:
            gate_up_states = self.gate_up_proj(hidden_states, adapter_data)