diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 78372195..bcaf6ec1 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -46,6 +46,7 @@ __all__ = [ if FLASH_ATTENTION: __all__.append(FlashNeoX) __all__.append(FlashNeoXSharded) + __all__.append(FlashSantacoder) # The flag below controls whether to allow TF32 on matmul. This flag defaults to False # in PyTorch 1.12 and later.