From 891e19cc5126a53375a1908312da4e7cedd763e0 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 7 Aug 2023 12:28:19 +0200 Subject: [PATCH 1/2] Fix dynamic rope. (#783) # What does this PR do? Typo Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. --- server/text_generation_server/utils/layers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 97257f95..c2a340b2 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -543,7 +543,7 @@ try: or self._cos_cached.dtype != dtype ): if seqlen > self.max_position_embeddings: - newbase = self.base * ((self.scaling_factor * seq_len / self.max_position_embeddings) - (self.scaling_factor - 1)) ** (self.dim / (self.dim - 2)) + newbase = self.base * ((self.scaling_factor * seqlen / self.max_position_embeddings) - (self.scaling_factor - 1)) ** (self.dim / (self.dim - 2)) self.inv_freq = _create_inv_freq(self.dim, newbase, self.inv_freq.device) self._seq_len_cached = seqlen t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype) From 1fdc88ee908beb8ae0afe17810a17b9b4d8848e2 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 7 Aug 2023 13:02:00 +0200 Subject: [PATCH 2/2] Fixing non 4bits quantization. (#785) # What does this PR do? Fixes # (issue) Fixes #784 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. --- server/text_generation_server/utils/gptq/quant_linear.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/utils/gptq/quant_linear.py b/server/text_generation_server/utils/gptq/quant_linear.py index 54fa2014..bfc91c00 100644 --- a/server/text_generation_server/utils/gptq/quant_linear.py +++ b/server/text_generation_server/utils/gptq/quant_linear.py @@ -263,7 +263,7 @@ class QuantLinear(nn.Module): self.groupsize = groupsize self.outfeatures = qweight.shape[1] - self.infeatures = qweight.shape[0] * 32 // 4 + self.infeatures = qweight.shape[0] * 32 // bits @classmethod def new(cls, bits, groupsize, infeatures, outfeatures, bias):