Tiny fixes for falcon.

This commit is contained in:
Nicolas Patry 2023-06-14 09:29:44 +02:00
parent e5e552b496
commit 55cf4d257c
2 changed files with 7 additions and 4 deletions

View File

@ -21,7 +21,8 @@ from text_generation_server.utils.layers import (
def load_row(config, prefix: str, weights, bias: bool):
weight = weights.get_sharded(f"{prefix}.weight", dim=1)
weight = weights.get_multi_weights_col([prefix], quantize=config.quantize, dim=1)
if bias and weights.process_group.rank() == 0:
# Rank is only on the first rank process
bias = weights.get_tensor(f"{prefix}.bias")

View File

@ -205,7 +205,7 @@ class GPTQ:
def print_loss(self, name, q_weight, weight_error, timecost):
table = Texttable()
length = 30
length = 28
name = (
(name + " " * (length - len(name)))
if len(name) <= length
@ -1165,10 +1165,12 @@ def quantize(
f"split in {len(shards)} checkpoint shards. You can find where each parameters has been saved in the "
f"index located at {save_index_file}."
)
config = AutoConfig.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code)
config.save_pretrained(output_dir)
logger.info("Saved config")
logger.info("Saving tokenizer")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(
model_id, trust_remote_code=trust_remote_code
)
tokenizer.save_pretrained(output_dir)
logger.info("Saved tokenizer")