mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-02 23:42:06 +00:00
Tiny fixes for falcon.
This commit is contained in:
parent
e5e552b496
commit
55cf4d257c
@ -21,7 +21,8 @@ from text_generation_server.utils.layers import (
|
||||
|
||||
|
||||
def load_row(config, prefix: str, weights, bias: bool):
|
||||
weight = weights.get_sharded(f"{prefix}.weight", dim=1)
|
||||
weight = weights.get_multi_weights_col([prefix], quantize=config.quantize, dim=1)
|
||||
|
||||
if bias and weights.process_group.rank() == 0:
|
||||
# Rank is only on the first rank process
|
||||
bias = weights.get_tensor(f"{prefix}.bias")
|
||||
|
@ -205,7 +205,7 @@ class GPTQ:
|
||||
|
||||
def print_loss(self, name, q_weight, weight_error, timecost):
|
||||
table = Texttable()
|
||||
length = 30
|
||||
length = 28
|
||||
name = (
|
||||
(name + " " * (length - len(name)))
|
||||
if len(name) <= length
|
||||
@ -1165,10 +1165,12 @@ def quantize(
|
||||
f"split in {len(shards)} checkpoint shards. You can find where each parameters has been saved in the "
|
||||
f"index located at {save_index_file}."
|
||||
)
|
||||
config = AutoConfig.from_pretrained(model_id)
|
||||
config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code)
|
||||
config.save_pretrained(output_dir)
|
||||
logger.info("Saved config")
|
||||
logger.info("Saving tokenizer")
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_id, trust_remote_code=trust_remote_code
|
||||
)
|
||||
tokenizer.save_pretrained(output_dir)
|
||||
logger.info("Saved tokenizer")
|
||||
|
Loading…
Reference in New Issue
Block a user