mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
Merge fd88b1d6b9
into 8f8819795f
This commit is contained in:
commit
d73e2184ef
@ -89,7 +89,12 @@ impl LlavaNext {
|
||||
pub fn get_number_of_features(&self, height: usize, width: usize) -> usize {
|
||||
let image_size = self.vision_config.image_size;
|
||||
let patch_size = self.vision_config.patch_size;
|
||||
assert!(image_size % patch_size == 0);
|
||||
if image_size % patch_size != 0 {
|
||||
warn!(
|
||||
"Image size {} is not divisible by patch size {}, will round down",
|
||||
image_size, patch_size
|
||||
);
|
||||
}
|
||||
let npatches = image_size / patch_size;
|
||||
// Dimensions are intentionally swapped to be bug-compatible with
|
||||
// upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59
|
||||
@ -461,4 +466,26 @@ mod test {
|
||||
let slots = config.get_number_of_features(1067, 1600);
|
||||
assert_eq!(slots, 2144);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uneven_division() {
|
||||
let config = LlavaNext {
|
||||
text_config: TextConfig {},
|
||||
vision_config: VisionConfig {
|
||||
image_size: 337, // Intentionally uneven
|
||||
patch_size: 14,
|
||||
},
|
||||
image_grid_pinpoints: vec![
|
||||
(336, 672),
|
||||
(672, 336),
|
||||
(672, 672),
|
||||
(1008, 336),
|
||||
(336, 1008),
|
||||
],
|
||||
};
|
||||
|
||||
// Should still work even with uneven division
|
||||
let slots = config.get_number_of_features(640, 640);
|
||||
assert_eq!(slots, 2928);
|
||||
}
|
||||
}
|
||||
|
@ -225,7 +225,10 @@ def get_number_of_features(height: int, width: int, config) -> int:
|
||||
image_size = config.vision_config.image_size
|
||||
patch_size = config.vision_config.patch_size
|
||||
|
||||
assert image_size % patch_size == 0
|
||||
if image_size % patch_size != 0:
|
||||
logger.warning(
|
||||
f"Image size {image_size} is not divisible by patch size {patch_size}"
|
||||
)
|
||||
|
||||
npatches = image_size // patch_size
|
||||
|
||||
@ -579,9 +582,9 @@ class VlmCausalLM(FlashCausalLM):
|
||||
cuda_graph["input_lengths"].zero_()
|
||||
cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
|
||||
cuda_graph["cache_lengths"].zero_()
|
||||
cuda_graph["cache_lengths"][
|
||||
: cache_lengths_tensor.shape[0]
|
||||
] = cache_lengths_tensor
|
||||
cuda_graph["cache_lengths"][: cache_lengths_tensor.shape[0]] = (
|
||||
cache_lengths_tensor
|
||||
)
|
||||
|
||||
with self._forward_context(
|
||||
block_tables=cuda_graph["block_tables"],
|
||||
|
Loading…
Reference in New Issue
Block a user