Pad token handling for Llama3.1 (#199)

This commit is contained in:
Sun Choi 2024-08-12 15:00:41 -07:00 committed by GitHub
parent c09f5bc930
commit e3f0f85b70
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -674,7 +674,14 @@ class CausalLM(Model):
if model.config.pad_token_id is not None: if model.config.pad_token_id is not None:
tokenizer.pad_token_id = model.config.pad_token_id tokenizer.pad_token_id = model.config.pad_token_id
elif model.config.eos_token_id is not None: elif model.config.eos_token_id is not None:
tokenizer.pad_token_id = model.config.eos_token_id if isinstance(model.config.eos_token_id, int):
tokenizer.pad_token_id = model.config.eos_token_id
elif isinstance(model.config.eos_token_id, list):
tokenizer.pad_token_id = model.config.eos_token_id[0]
else:
raise ValueError(
f"{type(model.config.eos_token_id)} type of eos_token_id in the model's config is not supported for tokenizer.pad_token_id"
)
elif tokenizer.eos_token_id is not None: elif tokenizer.eos_token_id is not None:
tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.pad_token_id = tokenizer.eos_token_id
else: else: