mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 00:12:08 +00:00
* feat: add ruff and resolve issue * fix: update client exports and adjust after rebase * fix: adjust syntax to avoid circular import * fix: adjust client ruff settings * fix: lint and refactor import check and avoid model enum as global names * fix: improve fbgemm_gpu check and lints * fix: update lints * fix: prefer comparing model enum over str * fix: adjust lints and ignore specific rules * fix: avoid unneeded quantize check
18 lines
478 B
Python
18 lines
478 B
Python
from text_generation_server.layers.marlin.fp8 import GPTQMarlinFP8Linear
|
|
from text_generation_server.layers.marlin.gptq import (
|
|
GPTQMarlinLinear,
|
|
GPTQMarlinWeight,
|
|
can_use_gptq_marlin,
|
|
repack_gptq_for_marlin,
|
|
)
|
|
from text_generation_server.layers.marlin.marlin import MarlinWeightsLoader
|
|
|
|
__all__ = [
|
|
"GPTQMarlinFP8Linear",
|
|
"GPTQMarlinLinear",
|
|
"GPTQMarlinWeight",
|
|
"MarlinWeightsLoader",
|
|
"can_use_gptq_marlin",
|
|
"repack_gptq_for_marlin",
|
|
]
|