mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
The `GPTWeightLoader` was structured like this in pseudocode: if marlin: Set up tensors in a way that GPTQ-Marlin expects else: Set up tensors in a way that ExLlama/GPTQ/AWQ expect However, the GPT-Marlin implementation details should really be in the `marlin` module. So move the former part out to a separate `GPTQMarlinWeightsLoader`.
16 lines
446 B
Python
16 lines
446 B
Python
from text_generation_server.layers.marlin.fp8 import GPTQMarlinFP8Linear
|
|
from text_generation_server.layers.marlin.gptq import (
|
|
GPTQMarlinWeightsLoader,
|
|
can_use_gptq_marlin,
|
|
repack_gptq_for_marlin,
|
|
)
|
|
from text_generation_server.layers.marlin.marlin import MarlinWeightsLoader
|
|
|
|
__all__ = [
|
|
"GPTQMarlinFP8Linear",
|
|
"GPTQMarlinWeightsLoader",
|
|
"MarlinWeightsLoader",
|
|
"can_use_gptq_marlin",
|
|
"repack_gptq_for_marlin",
|
|
]
|