From 933060cc3faacbf76626bc31bed2ee6d60cd9c0f Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 30 Sep 2024 11:53:21 +0200 Subject: [PATCH] Updating model link. --- docs/source/supported_models.md | 1 + server/text_generation_server/models/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md index 3fa78ee9..be280a2b 100644 --- a/docs/source/supported_models.md +++ b/docs/source/supported_models.md @@ -35,6 +35,7 @@ Text Generation Inference enables serving optimized models on specific hardware - [Gpt Neox](https://huggingface.co/EleutherAI/gpt-neox-20b) - [Gptj](https://huggingface.co/EleutherAI/gpt-j-6b) - [Idefics](https://huggingface.co/HuggingFaceM4/idefics-9b) (Multimodal) +- [Mllama](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct) (Multimodal) If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models: diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index babd851d..085cea5f 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -324,7 +324,7 @@ class ModelType(enum.Enum): MLLAMA = { "type": "mllama", "name": "Mllama", - "url": "https://huggingface.co/xxx/xx", + "url": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct", "multimodal": True, }