use GPT2TokenizerFast by default

This commit is contained in:
OlivierDehaene 2024-03-29 18:46:28 +01:00
parent dcfefc425a
commit 275a61aae6

View File

@ -37,6 +37,17 @@ class FlashDbrx(FlashCausalLM):
else:
raise NotImplementedError("FlashDBRX is only available on GPU")
try:
tokenizer = GPT2TokenizerFast.from_pretrained(
model_id,
revision=revision,
padding_side="left",
truncation_side="left",
trust_remote_code=trust_remote_code,
use_fast=True,
from_slow=False,
)
except:
try:
tokenizer = AutoTokenizer.from_pretrained(
model_id,