mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 14:52:20 +00:00
Merge fd88b1d6b9
into 8f8819795f
This commit is contained in:
commit
d73e2184ef
@ -89,7 +89,12 @@ impl LlavaNext {
|
|||||||
pub fn get_number_of_features(&self, height: usize, width: usize) -> usize {
|
pub fn get_number_of_features(&self, height: usize, width: usize) -> usize {
|
||||||
let image_size = self.vision_config.image_size;
|
let image_size = self.vision_config.image_size;
|
||||||
let patch_size = self.vision_config.patch_size;
|
let patch_size = self.vision_config.patch_size;
|
||||||
assert!(image_size % patch_size == 0);
|
if image_size % patch_size != 0 {
|
||||||
|
warn!(
|
||||||
|
"Image size {} is not divisible by patch size {}, will round down",
|
||||||
|
image_size, patch_size
|
||||||
|
);
|
||||||
|
}
|
||||||
let npatches = image_size / patch_size;
|
let npatches = image_size / patch_size;
|
||||||
// Dimensions are intentionally swapped to be bug-compatible with
|
// Dimensions are intentionally swapped to be bug-compatible with
|
||||||
// upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59
|
// upstream: https://github.com/LLaVA-VL/LLaVA-NeXT/issues/59
|
||||||
@ -461,4 +466,26 @@ mod test {
|
|||||||
let slots = config.get_number_of_features(1067, 1600);
|
let slots = config.get_number_of_features(1067, 1600);
|
||||||
assert_eq!(slots, 2144);
|
assert_eq!(slots, 2144);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_uneven_division() {
|
||||||
|
let config = LlavaNext {
|
||||||
|
text_config: TextConfig {},
|
||||||
|
vision_config: VisionConfig {
|
||||||
|
image_size: 337, // Intentionally uneven
|
||||||
|
patch_size: 14,
|
||||||
|
},
|
||||||
|
image_grid_pinpoints: vec![
|
||||||
|
(336, 672),
|
||||||
|
(672, 336),
|
||||||
|
(672, 672),
|
||||||
|
(1008, 336),
|
||||||
|
(336, 1008),
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Should still work even with uneven division
|
||||||
|
let slots = config.get_number_of_features(640, 640);
|
||||||
|
assert_eq!(slots, 2928);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -225,7 +225,10 @@ def get_number_of_features(height: int, width: int, config) -> int:
|
|||||||
image_size = config.vision_config.image_size
|
image_size = config.vision_config.image_size
|
||||||
patch_size = config.vision_config.patch_size
|
patch_size = config.vision_config.patch_size
|
||||||
|
|
||||||
assert image_size % patch_size == 0
|
if image_size % patch_size != 0:
|
||||||
|
logger.warning(
|
||||||
|
f"Image size {image_size} is not divisible by patch size {patch_size}"
|
||||||
|
)
|
||||||
|
|
||||||
npatches = image_size // patch_size
|
npatches = image_size // patch_size
|
||||||
|
|
||||||
@ -579,9 +582,9 @@ class VlmCausalLM(FlashCausalLM):
|
|||||||
cuda_graph["input_lengths"].zero_()
|
cuda_graph["input_lengths"].zero_()
|
||||||
cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
|
cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
|
||||||
cuda_graph["cache_lengths"].zero_()
|
cuda_graph["cache_lengths"].zero_()
|
||||||
cuda_graph["cache_lengths"][
|
cuda_graph["cache_lengths"][: cache_lengths_tensor.shape[0]] = (
|
||||||
: cache_lengths_tensor.shape[0]
|
cache_lengths_tensor
|
||||||
] = cache_lengths_tensor
|
)
|
||||||
|
|
||||||
with self._forward_context(
|
with self._forward_context(
|
||||||
block_tables=cuda_graph["block_tables"],
|
block_tables=cuda_graph["block_tables"],
|
||||||
|
Loading…
Reference in New Issue
Block a user