fix: read vocab size from tokenizer and add hacky patch for qwen2b

This commit is contained in:
drbh 2025-05-05 23:39:24 +00:00
parent 55d82d4654
commit b32cd97b71
5 changed files with 20 additions and 7 deletions

View File

@ -13,7 +13,7 @@
"usage": null
}
],
"created": 1745337456,
"created": 1746486174,
"id": "",
"model": "Qwen/Qwen2-VL-2B-Instruct",
"object": "chat.completion",

View File

@ -13,7 +13,7 @@
"usage": null
}
],
"created": 1745337878,
"created": 1746486174,
"id": "",
"model": "Qwen/Qwen2-VL-2B-Instruct",
"object": "chat.completion",

View File

@ -11,7 +11,7 @@
"logprobs": null
}
],
"created": 1745337495,
"created": 1746486174,
"id": "",
"model": "Qwen/Qwen2-VL-2B-Instruct",
"object": "chat.completion.chunk",

View File

@ -1267,6 +1267,15 @@ class FlashCausalLM(Model):
prefix = None
model = model_class(prefix, config, weights)
if model.config.vocab_size != tokenizer.vocab_size:
logger.warning(
f"Tokenizer vocab size {tokenizer.vocab_size} does not match model vocab size {model.config.vocab_size}. Updating tokenizer vocab size."
)
# TODO: HUGE HACK! This is a workaround for the fact that Qwen2TokenizerFast
# returns the incorrect vocab size for the 2B model.
tokenizer._vocab_size = model.config.vocab_size
torch.distributed.barrier(group=self.process_group)
# VLM models define the config we care about in their text_config

View File

@ -641,7 +641,8 @@ class LogitBiasProcessor(LogitsProcessor):
):
assert logit_biases, "LogitBiasProcessor requires non-empty logit_biases"
vocab_size = len(tokenizer)
# use _vocab_size or fallback to tokenizer.vocab_size if not available
self.vocab_size = getattr(tokenizer, "_vocab_size", tokenizer.vocab_size)
# Convert keys to integers and values to a list
token_ids = torch.tensor(
@ -650,7 +651,7 @@ class LogitBiasProcessor(LogitsProcessor):
bias_values = torch.tensor(list(logit_biases.values()), dtype=torch.float)
# Create a tensor and directly copy bias values at the corresponding indices
self.bias_tensor = torch.zeros(vocab_size, dtype=torch.float)
self.bias_tensor = torch.zeros(self.vocab_size, dtype=torch.float)
self.bias_tensor.index_put_((token_ids,), bias_values, accumulate=True)
def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor) -> torch.Tensor:
@ -669,10 +670,13 @@ class HeterogeneousLogitBiasProcessor(LogitsProcessor):
tokenizer: PreTrainedTokenizerBase,
device: torch.device,
):
assert logit_biases, "LogitBiasProcessor requires non-empty logit_biases"
self.tokenizer = tokenizer
self.logit_biases = logit_biases
# import ipdb; ipdb.set_trace()
self.vocab_size = len(tokenizer)
# use _vocab_size or fallback to tokenizer.vocab_size if not available
self.vocab_size = getattr(tokenizer, "_vocab_size", tokenizer.vocab_size)
# Create batch_size x vocab_size bias matrix
self.bias_matrix = torch.zeros(