mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Upgrading the tests (TP>1 fix changes to use different kernels.)
This commit is contained in:
parent
5ca6da15f5
commit
8673bb050d
@ -11,57 +11,57 @@
|
||||
},
|
||||
{
|
||||
"id": 3226,
|
||||
"logprob": -8.9453125,
|
||||
"logprob": -9.0234375,
|
||||
"text": " ge"
|
||||
},
|
||||
{
|
||||
"id": 21017,
|
||||
"logprob": -8.8515625,
|
||||
"logprob": -9.0859375,
|
||||
"text": "ometric"
|
||||
},
|
||||
{
|
||||
"id": 81,
|
||||
"logprob": -0.21875,
|
||||
"logprob": -0.25585938,
|
||||
"text": "_"
|
||||
},
|
||||
{
|
||||
"id": 6009,
|
||||
"logprob": -1.2773438,
|
||||
"logprob": -2.1972656,
|
||||
"text": "mean"
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"logprob": -0.25195312,
|
||||
"logprob": -0.2998047,
|
||||
"text": "("
|
||||
},
|
||||
{
|
||||
"id": 62,
|
||||
"logprob": -4.8203125,
|
||||
"logprob": -5.6445312,
|
||||
"text": "L"
|
||||
},
|
||||
{
|
||||
"id": 44,
|
||||
"logprob": -3.7734375,
|
||||
"logprob": -3.0839844,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 1682,
|
||||
"logprob": -0.8310547,
|
||||
"logprob": -0.6748047,
|
||||
"text": " List"
|
||||
},
|
||||
{
|
||||
"id": 77,
|
||||
"logprob": -0.22766113,
|
||||
"logprob": -0.3864746,
|
||||
"text": "["
|
||||
},
|
||||
{
|
||||
"id": 1808,
|
||||
"logprob": -0.46240234,
|
||||
"logprob": -0.9355469,
|
||||
"text": "float"
|
||||
},
|
||||
{
|
||||
"id": 10794,
|
||||
"logprob": -3.0234375,
|
||||
"logprob": -2.5371094,
|
||||
"text": "]):"
|
||||
}
|
||||
],
|
||||
@ -69,7 +69,7 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 284,
|
||||
"logprob": -0.04626465,
|
||||
"logprob": -1.1679688,
|
||||
"special": false,
|
||||
"text": "\n "
|
||||
},
|
||||
|
@ -11,57 +11,57 @@
|
||||
},
|
||||
{
|
||||
"id": 3226,
|
||||
"logprob": -8.9453125,
|
||||
"logprob": -9.015625,
|
||||
"text": " ge"
|
||||
},
|
||||
{
|
||||
"id": 21017,
|
||||
"logprob": -8.859375,
|
||||
"logprob": -9.0859375,
|
||||
"text": "ometric"
|
||||
},
|
||||
{
|
||||
"id": 81,
|
||||
"logprob": -0.21984863,
|
||||
"logprob": -0.25585938,
|
||||
"text": "_"
|
||||
},
|
||||
{
|
||||
"id": 6009,
|
||||
"logprob": -1.2861328,
|
||||
"logprob": -2.2304688,
|
||||
"text": "mean"
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"logprob": -0.25219727,
|
||||
"logprob": -0.29760742,
|
||||
"text": "("
|
||||
},
|
||||
{
|
||||
"id": 62,
|
||||
"logprob": -4.8007812,
|
||||
"logprob": -5.6796875,
|
||||
"text": "L"
|
||||
},
|
||||
{
|
||||
"id": 44,
|
||||
"logprob": -3.7949219,
|
||||
"logprob": -3.0742188,
|
||||
"text": ":"
|
||||
},
|
||||
{
|
||||
"id": 1682,
|
||||
"logprob": -0.8046875,
|
||||
"logprob": -0.67626953,
|
||||
"text": " List"
|
||||
},
|
||||
{
|
||||
"id": 77,
|
||||
"logprob": -0.22424316,
|
||||
"logprob": -0.38842773,
|
||||
"text": "["
|
||||
},
|
||||
{
|
||||
"id": 1808,
|
||||
"logprob": -0.46191406,
|
||||
"logprob": -0.9165039,
|
||||
"text": "float"
|
||||
},
|
||||
{
|
||||
"id": 10794,
|
||||
"logprob": -3.0253906,
|
||||
"logprob": -2.5527344,
|
||||
"text": "]):"
|
||||
}
|
||||
],
|
||||
@ -69,7 +69,7 @@
|
||||
"tokens": [
|
||||
{
|
||||
"id": 284,
|
||||
"logprob": 0.0,
|
||||
"logprob": -0.048583984,
|
||||
"special": false,
|
||||
"text": "\n "
|
||||
},
|
||||
|
@ -1601,8 +1601,6 @@ class FlashCausalLM(Model):
|
||||
max_s = batch.max_current_length
|
||||
lm_head_indices = batch.prefill_head_indices
|
||||
|
||||
print(slots)
|
||||
|
||||
if cu_seqlen_prefill is None and self.max_past() is not None:
|
||||
# In decode, not prefill, we're actually overwriting the KV-cache
|
||||
# in a circular buffer mode.
|
||||
|
Loading…
Reference in New Issue
Block a user