diff --git a/integration-tests/models/test_compressed_tensors_w8a8_int.py b/integration-tests/models/test_compressed_tensors_w8a8_int.py index 84fee287..ca7829c0 100644 --- a/integration-tests/models/test_compressed_tensors_w8a8_int.py +++ b/integration-tests/models/test_compressed_tensors_w8a8_int.py @@ -37,7 +37,9 @@ async def test_compressed_tensors_w8a8_int( assert response == response_snapshot +@pytest.mark.release @pytest.mark.asyncio +@pytest.mark.private async def test_compressed_tensors_w8a8_int_all_params( compressed_tensors_w8a8_int, response_snapshot ): diff --git a/integration-tests/models/test_compressed_tensors_w8a8_int_dynamic_weight.py b/integration-tests/models/test_compressed_tensors_w8a8_int_dynamic_weight.py index 50779218..7cc82a4e 100644 --- a/integration-tests/models/test_compressed_tensors_w8a8_int_dynamic_weight.py +++ b/integration-tests/models/test_compressed_tensors_w8a8_int_dynamic_weight.py @@ -39,7 +39,9 @@ async def test_compressed_tensors_w8a8_int_dynamic_weight( assert response == response_snapshot +@pytest.mark.release @pytest.mark.asyncio +@pytest.mark.private async def test_compressed_tensors_w8a8_int_dynamic_weight_all_params( compressed_tensors_w8a8_int_dynamic_weight, response_snapshot ): diff --git a/server/text_generation_server/layers/compressed_tensors/w8a8_int.py b/server/text_generation_server/layers/compressed_tensors/w8a8_int.py index 4bfc8c53..fc6d81e4 100644 --- a/server/text_generation_server/layers/compressed_tensors/w8a8_int.py +++ b/server/text_generation_server/layers/compressed_tensors/w8a8_int.py @@ -51,10 +51,10 @@ class W8A8IntLoader(WeightsLoader): def scale_to_str(scale): return "static" if scale else "dynamic" - def symmetric_to_string(symmetric): + def symmetric_to_str(symmetric): return "symmetric" if symmetric else "asymmetric" - return f"{self.__class__.__name__} (w8a8 int, input: dynamic/{symmetric_to_string(self.input_symmetric)}, weight: {scale_to_str(self.load_weight_scale)}/symmetric))" + return f"{self.__class__.__name__} (w8a8 int, input: dynamic/{symmetric_to_str(self.input_symmetric)}, weight: {scale_to_str(self.load_weight_scale)}/symmetric))" def get_weights(self, weights: "Weights", prefix: str): w = weights.get_tensor(f"{prefix}.weight", to_dtype=False)