Fixing exl2 and other quanize tests again.

This commit is contained in:
Nicolas Patry 2024-08-14 16:30:46 +02:00
parent 9aaa12e7ac
commit f4ce670eb0
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
4 changed files with 5 additions and 2 deletions

View File

@ -1,7 +1,7 @@
exllamav2_commit := v0.1.8
exllamav2_commit := 872386c89eaebe0bde5b245a890f1da9522768b3
build-exllamav2:
git clone https://github.com/turboderp/exllamav2.git exllamav2 && \
git clone https://github.com/Narsil/exllamav2.git exllamav2 && \
cd exllamav2 && git fetch && git checkout $(exllamav2_commit) && \
git submodule update --init --recursive && \
pip install -r requirements.txt && \

View File

@ -652,6 +652,7 @@ class CausalLM(Model):
dtype=dtype,
device=device,
)
self.quantize = quantize
return self
@property

View File

@ -412,6 +412,7 @@ class Mamba(Model):
dtype: Optional[torch.dtype] = None,
trust_remote_code: bool = False,
):
self.quantize = quantize
self.process_group, _rank, world_size = initialize_torch_distributed()
if world_size > 1:
raise RuntimeError("Mamba does not support Tensor Parallelism (TP)")

View File

@ -676,6 +676,7 @@ class Seq2SeqLM(Model):
dtype=dtype,
device=device,
)
self.quantize = quantize
return self
@property