text-generation-inference/server/text_generation/models/__init__.py

from text_generation.models.model import Model
from text_generation.models.bloom import BLOOMSharded

__all__ = ["Model", "BLOOMSharded"]


def get_model(model_name: str, sharded: bool, quantize: bool) -> Model:

    if model_name.startswith("bigscience/bloom"):
        if sharded:
            return BLOOMSharded(model_name, quantize)
        else:
            if quantize:
                raise ValueError("quantization is not supported for non-sharded BLOOM")
            return Model(model_name)
    else:
        if sharded:
            raise ValueError("sharded is only supported for BLOOM models")
        if quantize:
            raise ValueError("Quantization is only supported for BLOOM models")

        return Model(model_name)
feat(server): Support all AutoModelForCausalLM on a best effort basis 2022-10-28 17:24:00 +00:00			`from text_generation.models.model import Model`
			`from text_generation.models.bloom import BLOOMSharded`

			`__all__ = ["Model", "BLOOMSharded"]`


			`def get_model(model_name: str, sharded: bool, quantize: bool) -> Model:`

			`if model_name.startswith("bigscience/bloom"):`
			`if sharded:`
			`return BLOOMSharded(model_name, quantize)`
			`else:`
			`if quantize:`
			`raise ValueError("quantization is not supported for non-sharded BLOOM")`
			`return Model(model_name)`
			`else:`
			`if sharded:`
feat: Use json formatter by default in docker image 2022-11-02 16:29:56 +00:00			`raise ValueError("sharded is only supported for BLOOM models")`
feat(server): Support all AutoModelForCausalLM on a best effort basis 2022-10-28 17:24:00 +00:00			`if quantize:`
			`raise ValueError("Quantization is only supported for BLOOM models")`

			`return Model(model_name)`