mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-07-12 10:50:17 +00:00
Fixing the CI.
This commit is contained in:
parent
f2be5f3db4
commit
0b1d253c04
@ -193,7 +193,8 @@ RUN cd server && \
|
|||||||
pwd && \
|
pwd && \
|
||||||
text-generation-server --help
|
text-generation-server --help
|
||||||
|
|
||||||
RUN uv pip install torchvision --no-deps
|
# This shouldn't be necessary.
|
||||||
|
# RUN uv pip install torchvision --no-deps
|
||||||
|
|
||||||
# Copy build artifacts from flash attention builder
|
# Copy build artifacts from flash attention builder
|
||||||
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
|
COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
|
||||||
|
@ -155,10 +155,7 @@ fn find_supported_resolutions(max_num_chunks: usize, height: usize) -> Vec<(usiz
|
|||||||
let divisor = gcd(h, w);
|
let divisor = gcd(h, w);
|
||||||
let key = (h / divisor, w / divisor); // reduced aspect ratio as key
|
let key = (h / divisor, w / divisor); // reduced aspect ratio as key
|
||||||
|
|
||||||
if !asp_dict.contains_key(&key) {
|
asp_dict.entry(key).or_default().push((h, w));
|
||||||
asp_dict.insert(key, vec![]);
|
|
||||||
}
|
|
||||||
asp_dict.get_mut(&key).unwrap().push((h, w));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,7 +173,7 @@ fn find_supported_resolutions(max_num_chunks: usize, height: usize) -> Vec<(usiz
|
|||||||
fn get_best_fit(
|
fn get_best_fit(
|
||||||
original_height: usize,
|
original_height: usize,
|
||||||
original_width: usize,
|
original_width: usize,
|
||||||
possible_resolutions: &Vec<(usize, usize)>,
|
possible_resolutions: &[(usize, usize)],
|
||||||
resize_to_max_canvas: bool,
|
resize_to_max_canvas: bool,
|
||||||
) -> (usize, usize) {
|
) -> (usize, usize) {
|
||||||
let orig_h = original_height as f32;
|
let orig_h = original_height as f32;
|
||||||
@ -194,20 +191,13 @@ fn get_best_fit(
|
|||||||
let upscaling_options: Vec<f32> = scales.iter().copied().filter(|&s| s >= 1.0).collect();
|
let upscaling_options: Vec<f32> = scales.iter().copied().filter(|&s| s >= 1.0).collect();
|
||||||
let selected_scale = if !upscaling_options.is_empty() {
|
let selected_scale = if !upscaling_options.is_empty() {
|
||||||
if resize_to_max_canvas {
|
if resize_to_max_canvas {
|
||||||
upscaling_options
|
upscaling_options.into_iter().fold(f32::MIN, f32::max)
|
||||||
.into_iter()
|
|
||||||
.fold(f32::MIN, f32::max)
|
|
||||||
} else {
|
} else {
|
||||||
upscaling_options
|
upscaling_options.into_iter().fold(f32::MAX, f32::min)
|
||||||
.into_iter()
|
|
||||||
.fold(f32::MAX, f32::min)
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let downscaling_options: Vec<f32> =
|
let downscaling_options: Vec<f32> = scales.iter().copied().filter(|&s| s < 1.0).collect();
|
||||||
scales.iter().copied().filter(|&s| s < 1.0).collect();
|
downscaling_options.into_iter().fold(f32::MIN, f32::max)
|
||||||
downscaling_options
|
|
||||||
.into_iter()
|
|
||||||
.fold(f32::MIN, f32::max)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let chosen_canvas: Vec<(usize, usize)> = possible_resolutions
|
let chosen_canvas: Vec<(usize, usize)> = possible_resolutions
|
||||||
@ -375,7 +365,6 @@ pub struct Gemma3 {
|
|||||||
vision_config: Gemma3VisionConfig,
|
vision_config: Gemma3VisionConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[derive(Clone, Debug, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
||||||
#[serde(tag = "model_type")]
|
#[serde(tag = "model_type")]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
|
@ -207,7 +207,6 @@ pub struct Llama4Processor {
|
|||||||
do_image_splitting: bool,
|
do_image_splitting: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Default)]
|
#[derive(Debug, Clone, Deserialize, Default)]
|
||||||
pub struct HubProcessorConfig {
|
pub struct HubProcessorConfig {
|
||||||
pub chat_template: Option<ChatTemplateVersions>,
|
pub chat_template: Option<ChatTemplateVersions>,
|
||||||
|
@ -698,7 +698,8 @@ fn image_tokens(
|
|||||||
let image_height = config.image_size();
|
let image_height = config.image_size();
|
||||||
let patch_size = config.patch_size();
|
let patch_size = config.patch_size();
|
||||||
let pixel_shuffle_ratio = config.pixel_shuffle_ratio();
|
let pixel_shuffle_ratio = config.pixel_shuffle_ratio();
|
||||||
let downsample_ratio = (1.0 / (pixel_shuffle_ratio * pixel_shuffle_ratio)).round() as usize;
|
let downsample_ratio =
|
||||||
|
(1.0 / (pixel_shuffle_ratio * pixel_shuffle_ratio)).round() as usize;
|
||||||
|
|
||||||
let (ratio_h, ratio_w) = config.get_aspect_ratios(height, width);
|
let (ratio_h, ratio_w) = config.get_aspect_ratios(height, width);
|
||||||
let image_width = image_height; // Assuming pixel shape: [H][W][C]
|
let image_width = image_height; // Assuming pixel shape: [H][W][C]
|
||||||
@ -726,7 +727,7 @@ fn image_tokens(
|
|||||||
img_string.push_str(IMAGE_END);
|
img_string.push_str(IMAGE_END);
|
||||||
|
|
||||||
img_string
|
img_string
|
||||||
},
|
}
|
||||||
Qwen2Vl(config) => format!(
|
Qwen2Vl(config) => format!(
|
||||||
"<|vision_start|>{:?}<|vision_end|>",
|
"<|vision_start|>{:?}<|vision_end|>",
|
||||||
"<|image_pad|>".repeat(config.get_number_of_features(height, width))
|
"<|image_pad|>".repeat(config.get_number_of_features(height, width))
|
||||||
@ -770,8 +771,8 @@ fn prepare_input<T: TokenizerTrait>(
|
|||||||
static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"!\[\]\([^\)]*\)").unwrap());
|
static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"!\[\]\([^\)]*\)").unwrap());
|
||||||
let (tokenizer_query, input_chunks) = match config {
|
let (tokenizer_query, input_chunks) = match config {
|
||||||
Some(
|
Some(
|
||||||
config @ (Idefics | Mllama | Idefics2(_) | Idefics3(_) | Gemma3(_) | Llama4(_) | Paligemma(_)
|
config @ (Idefics | Mllama | Idefics2(_) | Idefics3(_) | Gemma3(_) | Llama4(_)
|
||||||
| LlavaNext(_) | Qwen2Vl(_) | Qwen2_5Vl(_)),
|
| Paligemma(_) | LlavaNext(_) | Qwen2Vl(_) | Qwen2_5Vl(_)),
|
||||||
) => {
|
) => {
|
||||||
let mut input_chunks = Vec::new();
|
let mut input_chunks = Vec::new();
|
||||||
let mut tokenizer_query = String::with_capacity(inputs.len());
|
let mut tokenizer_query = String::with_capacity(inputs.len());
|
||||||
|
@ -395,7 +395,7 @@ class TransformersFlashVlmCausalLM(VlmCausalLM):
|
|||||||
image_grid_thw=image_grid_thw,
|
image_grid_thw=image_grid_thw,
|
||||||
attention_mask=inputs.get("attention_mask", None),
|
attention_mask=inputs.get("attention_mask", None),
|
||||||
use_sdpa=inputs.get("use_sdpa", False),
|
use_sdpa=inputs.get("use_sdpa", False),
|
||||||
cache_position=inputs.get("cache_position", None)
|
cache_position=inputs.get("cache_position", None),
|
||||||
).logits
|
).logits
|
||||||
|
|
||||||
logits = self.post_process_outputs(logits, lm_head_indices)
|
logits = self.post_process_outputs(logits, lm_head_indices)
|
||||||
@ -560,9 +560,7 @@ class TransformersGemma3VlmCausalLM(TransformersFlashVlmCausalLM):
|
|||||||
|
|
||||||
class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
|
class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
|
||||||
def pre_process_inputs(self, input_ids, position_ids, cu_seqlen_prefill):
|
def pre_process_inputs(self, input_ids, position_ids, cu_seqlen_prefill):
|
||||||
inputs = super().pre_process_inputs(
|
inputs = super().pre_process_inputs(input_ids, position_ids, cu_seqlen_prefill)
|
||||||
input_ids, position_ids, cu_seqlen_prefill
|
|
||||||
)
|
|
||||||
inputs["cache_position"] = position_ids
|
inputs["cache_position"] = position_ids
|
||||||
inputs["attention_mask"] = torch.zeros((1, 1, 1, 1), device=input_ids.device)
|
inputs["attention_mask"] = torch.zeros((1, 1, 1, 1), device=input_ids.device)
|
||||||
return inputs
|
return inputs
|
Loading…
Reference in New Issue
Block a user