mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-20 22:32:07 +00:00
* feat: add ruff and resolve issue * fix: update client exports and adjust after rebase * fix: adjust syntax to avoid circular import * fix: adjust client ruff settings * fix: lint and refactor import check and avoid model enum as global names * fix: improve fbgemm_gpu check and lints * fix: update lints * fix: prefer comparing model enum over str * fix: adjust lints and ignore specific rules * fix: avoid unneeded quantize check
35 lines
919 B
Python
35 lines
919 B
Python
from text_generation_server.layers.tensor_parallel import (
|
|
TensorParallelColumnLinear,
|
|
TensorParallelRowLinear,
|
|
TensorParallelEmbedding,
|
|
)
|
|
from text_generation_server.layers.linear import (
|
|
get_linear,
|
|
FastLinear,
|
|
)
|
|
from text_generation_server.layers.speculative import SpeculativeHead
|
|
|
|
# Just to add the `load` methods.
|
|
from text_generation_server.layers.layernorm import load_layer_norm
|
|
from text_generation_server.layers.conv import load_conv2d
|
|
|
|
from text_generation_server.layers.lora import (
|
|
LoraLinear,
|
|
TensorParallelMultiAdapterLinear,
|
|
TensorParallelAdapterRowLinear,
|
|
)
|
|
|
|
__all__ = [
|
|
"get_linear",
|
|
"FastLinear",
|
|
"TensorParallelColumnLinear",
|
|
"TensorParallelRowLinear",
|
|
"TensorParallelEmbedding",
|
|
"SpeculativeHead",
|
|
"LoraLinear",
|
|
"TensorParallelMultiAdapterLinear",
|
|
"TensorParallelAdapterRowLinear",
|
|
"load_layer_norm",
|
|
"load_conv2d",
|
|
]
|