fix: adjust llava logic and bump snaps

This commit is contained in:
drbh 2025-06-06 14:54:10 +00:00
parent 30bdf922bd
commit 2204f91f32
3 changed files with 59 additions and 56 deletions

View File

@ -9,61 +9,61 @@
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -0.007621765, "logprob": -0.052612305,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.20812988, "logprob": -0.079589844,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 16114, "id": 16114,
"logprob": -1.2587891, "logprob": -1.6865234,
"special": false, "special": false,
"text": "Once" "text": "Once"
}, },
{ {
"id": 3714, "id": 3714,
"logprob": -0.20825195, "logprob": -0.20983887,
"special": false, "special": false,
"text": " upon" "text": " upon"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.0017709732, "logprob": -0.0014019012,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 727, "id": 727,
"logprob": -0.011932373, "logprob": -0.0121154785,
"special": false, "special": false,
"text": " time" "text": " time"
}, },
{ {
"id": 28725, "id": 28725,
"logprob": -0.17297363, "logprob": -0.15405273,
"special": false, "special": false,
"text": "," "text": ","
}, },
{ {
"id": 736, "id": 736,
"logprob": -0.9057617, "logprob": -0.4802246,
"special": false, "special": false,
"text": " there" "text": " there"
}, },
{ {
"id": 403, "id": 403,
"logprob": -0.05758667, "logprob": -0.03289795,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.00970459, "logprob": -0.01423645,
"special": false, "special": false,
"text": " a" "text": " a"
} }
@ -82,61 +82,61 @@
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -0.007621765, "logprob": -0.052612305,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.20275879, "logprob": -0.07946777,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 16114, "id": 16114,
"logprob": -1.2578125, "logprob": -1.6914062,
"special": false, "special": false,
"text": "Once" "text": "Once"
}, },
{ {
"id": 3714, "id": 3714,
"logprob": -0.2084961, "logprob": -0.21020508,
"special": false, "special": false,
"text": " upon" "text": " upon"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.0017738342, "logprob": -0.0014238358,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 727, "id": 727,
"logprob": -0.011932373, "logprob": -0.012138367,
"special": false, "special": false,
"text": " time" "text": " time"
}, },
{ {
"id": 28725, "id": 28725,
"logprob": -0.17297363, "logprob": -0.15625,
"special": false, "special": false,
"text": "," "text": ","
}, },
{ {
"id": 736, "id": 736,
"logprob": -0.9057617, "logprob": -0.47827148,
"special": false, "special": false,
"text": " there" "text": " there"
}, },
{ {
"id": 403, "id": 403,
"logprob": -0.05758667, "logprob": -0.03289795,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.00970459, "logprob": -0.01423645,
"special": false, "special": false,
"text": " a" "text": " a"
} }
@ -155,61 +155,61 @@
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -0.007621765, "logprob": -0.052246094,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.20275879, "logprob": -0.07739258,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 16114, "id": 16114,
"logprob": -1.2578125, "logprob": -1.6875,
"special": false, "special": false,
"text": "Once" "text": "Once"
}, },
{ {
"id": 3714, "id": 3714,
"logprob": -0.2084961, "logprob": -0.20922852,
"special": false, "special": false,
"text": " upon" "text": " upon"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.0017738342, "logprob": -0.0014228821,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 727, "id": 727,
"logprob": -0.011932373, "logprob": -0.012130737,
"special": false, "special": false,
"text": " time" "text": " time"
}, },
{ {
"id": 28725, "id": 28725,
"logprob": -0.17297363, "logprob": -0.15612793,
"special": false, "special": false,
"text": "," "text": ","
}, },
{ {
"id": 736, "id": 736,
"logprob": -0.9057617, "logprob": -0.47827148,
"special": false, "special": false,
"text": " there" "text": " there"
}, },
{ {
"id": 403, "id": 403,
"logprob": -0.05758667, "logprob": -0.032928467,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.00970459, "logprob": -0.014144897,
"special": false, "special": false,
"text": " a" "text": " a"
} }
@ -228,61 +228,61 @@
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -0.007621765, "logprob": -0.052978516,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.20812988, "logprob": -0.080444336,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 16114, "id": 16114,
"logprob": -1.2587891, "logprob": -1.6826172,
"special": false, "special": false,
"text": "Once" "text": "Once"
}, },
{ {
"id": 3714, "id": 3714,
"logprob": -0.20825195, "logprob": -0.21044922,
"special": false, "special": false,
"text": " upon" "text": " upon"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.0017709732, "logprob": -0.0014238358,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 727, "id": 727,
"logprob": -0.011932373, "logprob": -0.012107849,
"special": false, "special": false,
"text": " time" "text": " time"
}, },
{ {
"id": 28725, "id": 28725,
"logprob": -0.17297363, "logprob": -0.15405273,
"special": false, "special": false,
"text": "," "text": ","
}, },
{ {
"id": 736, "id": 736,
"logprob": -0.9057617, "logprob": -0.47875977,
"special": false, "special": false,
"text": " there" "text": " there"
}, },
{ {
"id": 403, "id": 403,
"logprob": -0.05758667, "logprob": -0.03289795,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.00970459, "logprob": -0.01423645,
"special": false, "special": false,
"text": " a" "text": " a"
} }

View File

@ -8,61 +8,61 @@
"tokens": [ "tokens": [
{ {
"id": 13, "id": 13,
"logprob": -0.00756073, "logprob": -0.052612305,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.20117188, "logprob": -0.07739258,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 16114, "id": 16114,
"logprob": -1.2597656, "logprob": -1.6914062,
"special": false, "special": false,
"text": "Once" "text": "Once"
}, },
{ {
"id": 3714, "id": 3714,
"logprob": -0.20825195, "logprob": -0.21020508,
"special": false, "special": false,
"text": " upon" "text": " upon"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.00178051, "logprob": -0.0014228821,
"special": false, "special": false,
"text": " a" "text": " a"
}, },
{ {
"id": 727, "id": 727,
"logprob": -0.011955261, "logprob": -0.012123108,
"special": false, "special": false,
"text": " time" "text": " time"
}, },
{ {
"id": 28725, "id": 28725,
"logprob": -0.17541504, "logprob": -0.15625,
"special": false, "special": false,
"text": "," "text": ","
}, },
{ {
"id": 736, "id": 736,
"logprob": -0.91308594, "logprob": -0.47875977,
"special": false, "special": false,
"text": " there" "text": " there"
}, },
{ {
"id": 403, "id": 403,
"logprob": -0.058410645, "logprob": -0.033416748,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 264, "id": 264,
"logprob": -0.009689331, "logprob": -0.014137268,
"special": false, "special": false,
"text": " a" "text": " a"
} }

View File

@ -133,6 +133,8 @@ class LlavaNextForConditionalGeneration(nn.Module):
# and we select the hidden states at those layers # and we select the hidden states at those layers
vision_feature_layer = config.vision_feature_layer vision_feature_layer = config.vision_feature_layer
else:
vision_feature_layer = [vision_config.num_hidden_layers - 1]
self.vision_feature_layer = vision_feature_layer self.vision_feature_layer = vision_feature_layer
@ -209,12 +211,13 @@ class LlavaNextForConditionalGeneration(nn.Module):
f"Strategy `{self.config.vision_feature_select_strategy}` is not supported/valid." f"Strategy `{self.config.vision_feature_select_strategy}` is not supported/valid."
) )
# vision_feature_layer is a list of layer indices, we select the hidden states at those layers if image_features.hidden_states is not None:
hs_pool = [ # vision_feature_layer is a list of layer indices, we select the hidden states at those layers
image_features.hidden_states[layer_idx] hs_pool = [
for layer_idx in self.vision_feature_layer image_features.hidden_states[layer_idx]
] for layer_idx in self.vision_feature_layer
selected_image_feature = torch.cat(hs_pool, dim=-1) ]
selected_image_feature = torch.cat(hs_pool, dim=-1)
image_features = self.multi_modal_projector(selected_image_feature) image_features = self.multi_modal_projector(selected_image_feature)