mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-05-03 07:52:06 +00:00
Tiny fixes for falcon.
This commit is contained in:
parent
e5e552b496
commit
55cf4d257c
@ -21,7 +21,8 @@ from text_generation_server.utils.layers import (
|
|||||||
|
|
||||||
|
|
||||||
def load_row(config, prefix: str, weights, bias: bool):
|
def load_row(config, prefix: str, weights, bias: bool):
|
||||||
weight = weights.get_sharded(f"{prefix}.weight", dim=1)
|
weight = weights.get_multi_weights_col([prefix], quantize=config.quantize, dim=1)
|
||||||
|
|
||||||
if bias and weights.process_group.rank() == 0:
|
if bias and weights.process_group.rank() == 0:
|
||||||
# Rank is only on the first rank process
|
# Rank is only on the first rank process
|
||||||
bias = weights.get_tensor(f"{prefix}.bias")
|
bias = weights.get_tensor(f"{prefix}.bias")
|
||||||
|
@ -205,7 +205,7 @@ class GPTQ:
|
|||||||
|
|
||||||
def print_loss(self, name, q_weight, weight_error, timecost):
|
def print_loss(self, name, q_weight, weight_error, timecost):
|
||||||
table = Texttable()
|
table = Texttable()
|
||||||
length = 30
|
length = 28
|
||||||
name = (
|
name = (
|
||||||
(name + " " * (length - len(name)))
|
(name + " " * (length - len(name)))
|
||||||
if len(name) <= length
|
if len(name) <= length
|
||||||
@ -1165,10 +1165,12 @@ def quantize(
|
|||||||
f"split in {len(shards)} checkpoint shards. You can find where each parameters has been saved in the "
|
f"split in {len(shards)} checkpoint shards. You can find where each parameters has been saved in the "
|
||||||
f"index located at {save_index_file}."
|
f"index located at {save_index_file}."
|
||||||
)
|
)
|
||||||
config = AutoConfig.from_pretrained(model_id)
|
config = AutoConfig.from_pretrained(model_id, trust_remote_code=trust_remote_code)
|
||||||
config.save_pretrained(output_dir)
|
config.save_pretrained(output_dir)
|
||||||
logger.info("Saved config")
|
logger.info("Saved config")
|
||||||
logger.info("Saving tokenizer")
|
logger.info("Saving tokenizer")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
|
model_id, trust_remote_code=trust_remote_code
|
||||||
|
)
|
||||||
tokenizer.save_pretrained(output_dir)
|
tokenizer.save_pretrained(output_dir)
|
||||||
logger.info("Saved tokenizer")
|
logger.info("Saved tokenizer")
|
||||||
|
Loading…
Reference in New Issue
Block a user