mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
Fixing exl2 and other quanize tests again.
This commit is contained in:
parent
9aaa12e7ac
commit
f4ce670eb0
@ -1,7 +1,7 @@
|
||||
exllamav2_commit := v0.1.8
|
||||
exllamav2_commit := 872386c89eaebe0bde5b245a890f1da9522768b3
|
||||
|
||||
build-exllamav2:
|
||||
git clone https://github.com/turboderp/exllamav2.git exllamav2 && \
|
||||
git clone https://github.com/Narsil/exllamav2.git exllamav2 && \
|
||||
cd exllamav2 && git fetch && git checkout $(exllamav2_commit) && \
|
||||
git submodule update --init --recursive && \
|
||||
pip install -r requirements.txt && \
|
||||
|
@ -652,6 +652,7 @@ class CausalLM(Model):
|
||||
dtype=dtype,
|
||||
device=device,
|
||||
)
|
||||
self.quantize = quantize
|
||||
return self
|
||||
|
||||
@property
|
||||
|
@ -412,6 +412,7 @@ class Mamba(Model):
|
||||
dtype: Optional[torch.dtype] = None,
|
||||
trust_remote_code: bool = False,
|
||||
):
|
||||
self.quantize = quantize
|
||||
self.process_group, _rank, world_size = initialize_torch_distributed()
|
||||
if world_size > 1:
|
||||
raise RuntimeError("Mamba does not support Tensor Parallelism (TP)")
|
||||
|
@ -676,6 +676,7 @@ class Seq2SeqLM(Model):
|
||||
dtype=dtype,
|
||||
device=device,
|
||||
)
|
||||
self.quantize = quantize
|
||||
return self
|
||||
|
||||
@property
|
||||
|
Loading…
Reference in New Issue
Block a user