fix: run lint and update docs

2025-09-11 20:34:54 +00:00 · 2024-09-02 19:29:27 +00:00 · 2024-09-02 19:29:27 +00:00 · 1c917c0349
commit 1c917c0349
parent 1fb9d406e7
3 changed files with 3 additions and 4 deletions
--- a/docs/openapi.json
+++ b/docs/openapi.json
@ -2183,4 +2183,4 @@
      "description": "Hugging Face Text Generation Inference API"
    }
  ]
-}
+}
--- a/docs/source/supported_models.md
+++ b/docs/source/supported_models.md
@ -20,6 +20,7 @@ Text Generation Inference enables serving optimized models on specific hardware
 - [Mixtral](https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1)
 - [Gpt Bigcode](https://huggingface.co/bigcode/gpt_bigcode-santacoder)
 - [Phi](https://huggingface.co/microsoft/phi-1_5)
 - [PhiMoe](https://huggingface.co/microsoft/Phi-3.5-MoE-instruct)
 - [Baichuan](https://huggingface.co/baichuan-inc/Baichuan2-7B-Chat)
 - [Falcon](https://huggingface.co/tiiuae/falcon-7b-instruct)
 - [StarCoder 2](https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1)
--- a/integration-tests/models/test_flash_phi35_moe.py
+++ b/integration-tests/models/test_flash_phi35_moe.py
@ -4,9 +4,7 @@ import pytest
@pytest.fixture(scope="module")
 def flash_llama_chat_handle(launcher):
    with launcher(
-        "microsoft/Phi-3.5-MoE-instruct", 
+        "microsoft/Phi-3.5-MoE-instruct", num_shard=4, cuda_graphs=[1, 2]
        num_shard=4,
        cuda_graphs=[1, 2]
    ) as handle:
        yield handle