diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md index 3fa78ee9..be280a2b 100644 --- a/docs/source/supported_models.md +++ b/docs/source/supported_models.md @@ -35,6 +35,7 @@ Text Generation Inference enables serving optimized models on specific hardware - [Gpt Neox](https://huggingface.co/EleutherAI/gpt-neox-20b) - [Gptj](https://huggingface.co/EleutherAI/gpt-j-6b) - [Idefics](https://huggingface.co/HuggingFaceM4/idefics-9b) (Multimodal) +- [Mllama](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct) (Multimodal) If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models: diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index babd851d..085cea5f 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -324,7 +324,7 @@ class ModelType(enum.Enum): MLLAMA = { "type": "mllama", "name": "Mllama", - "url": "https://huggingface.co/xxx/xx", + "url": "https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct", "multimodal": True, }