diff --git a/integration-tests/models/__snapshots__/test_mpt/test_mpt.json b/integration-tests/models/__snapshots__/test_mpt/test_mpt.json new file mode 100644 index 00000000..abbbf03c --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mpt/test_mpt.json @@ -0,0 +1,140 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 17, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5117188, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.96875, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.953125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.94189453, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 428, + "logprob": -1.5830078, + "special": false, + "text": " -" + }, + { + "id": 18147, + "logprob": -3.3105469, + "special": false, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -0.3215332, + "special": false, + "text": " Learning" + }, + { + "id": 187, + "logprob": -2.5566406, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.6074219, + "special": false, + "text": "Deep" + }, + { + "id": 20727, + "logprob": -0.69628906, + "special": false, + "text": " Learning" + }, + { + "id": 310, + "logprob": -0.6923828, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.5263672, + "special": false, + "text": " a" + }, + { + "id": 749, + "logprob": -1.8544922, + "special": false, + "text": " sub" + }, + { + "id": 3423, + "logprob": -0.6118164, + "special": false, + "text": "field" + }, + { + "id": 273, + "logprob": -0.055877686, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.0537109, + "special": false, + "text": " machine" + }, + { + "id": 4715, + "logprob": -0.0115737915, + "special": false, + "text": " learning" + }, + { + "id": 326, + "logprob": -0.9111328, + "special": false, + "text": " that" + }, + { + "id": 4648, + "logprob": -1.4589844, + "special": false, + "text": " uses" + }, + { + "id": 13345, + "logprob": -1.4853516, + "special": false, + "text": " artificial" + }, + { + "id": 11454, + "logprob": -0.021636963, + "special": false, + "text": " neural" + } + ] + }, + "generated_text": " - Deep Learning\nDeep Learning is a subfield of machine learning that uses artificial neural" +} diff --git a/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json b/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json new file mode 100644 index 00000000..e3bc57ed --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json @@ -0,0 +1,562 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 17, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5117188, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.96875, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.953125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.94189453, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 428, + "logprob": -1.5830078, + "special": false, + "text": " -" + }, + { + "id": 18147, + "logprob": -3.3183594, + "special": false, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -0.32617188, + "special": false, + "text": " Learning" + }, + { + "id": 187, + "logprob": -2.5742188, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.6015625, + "special": false, + "text": "Deep" + }, + { + "id": 20727, + "logprob": -0.69628906, + "special": false, + "text": " Learning" + }, + { + "id": 310, + "logprob": -0.67822266, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.5395508, + "special": false, + "text": " a" + }, + { + "id": 749, + "logprob": -1.8623047, + "special": false, + "text": " sub" + }, + { + "id": 3423, + "logprob": -0.6020508, + "special": false, + "text": "field" + }, + { + "id": 273, + "logprob": -0.0552063, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.0742188, + "special": false, + "text": " machine" + }, + { + "id": 4715, + "logprob": -0.011405945, + "special": false, + "text": " learning" + }, + { + "id": 326, + "logprob": -0.9165039, + "special": false, + "text": " that" + }, + { + "id": 4648, + "logprob": -1.4501953, + "special": false, + "text": " uses" + }, + { + "id": 13345, + "logprob": -1.4960938, + "special": false, + "text": " artificial" + }, + { + "id": 11454, + "logprob": -0.02116394, + "special": false, + "text": " neural" + } + ] + }, + "generated_text": " - Deep Learning\nDeep Learning is a subfield of machine learning that uses artificial neural" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 17, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 428, + "logprob": -1.5800781, + "special": false, + "text": " -" + }, + { + "id": 18147, + "logprob": -3.3242188, + "special": false, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -0.31835938, + "special": false, + "text": " Learning" + }, + { + "id": 187, + "logprob": -2.5644531, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.5957031, + "special": false, + "text": "Deep" + }, + { + "id": 20727, + "logprob": -0.69628906, + "special": false, + "text": " Learning" + }, + { + "id": 310, + "logprob": -0.68603516, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.5258789, + "special": false, + "text": " a" + }, + { + "id": 749, + "logprob": -1.859375, + "special": false, + "text": " sub" + }, + { + "id": 3423, + "logprob": -0.6166992, + "special": false, + "text": "field" + }, + { + "id": 273, + "logprob": -0.056762695, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.0703125, + "special": false, + "text": " machine" + }, + { + "id": 4715, + "logprob": -0.011428833, + "special": false, + "text": " learning" + }, + { + "id": 326, + "logprob": -0.9213867, + "special": false, + "text": " that" + }, + { + "id": 4648, + "logprob": -1.4726562, + "special": false, + "text": " uses" + }, + { + "id": 13345, + "logprob": -1.5039062, + "special": false, + "text": " artificial" + }, + { + "id": 11454, + "logprob": -0.021652222, + "special": false, + "text": " neural" + } + ] + }, + "generated_text": " - Deep Learning\nDeep Learning is a subfield of machine learning that uses artificial neural" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 17, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 428, + "logprob": -1.5800781, + "special": false, + "text": " -" + }, + { + "id": 18147, + "logprob": -3.3242188, + "special": false, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -0.31835938, + "special": false, + "text": " Learning" + }, + { + "id": 187, + "logprob": -2.5644531, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.5957031, + "special": false, + "text": "Deep" + }, + { + "id": 20727, + "logprob": -0.69628906, + "special": false, + "text": " Learning" + }, + { + "id": 310, + "logprob": -0.68603516, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.5258789, + "special": false, + "text": " a" + }, + { + "id": 749, + "logprob": -1.859375, + "special": false, + "text": " sub" + }, + { + "id": 3423, + "logprob": -0.6166992, + "special": false, + "text": "field" + }, + { + "id": 273, + "logprob": -0.056762695, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.0703125, + "special": false, + "text": " machine" + }, + { + "id": 4715, + "logprob": -0.011428833, + "special": false, + "text": " learning" + }, + { + "id": 326, + "logprob": -0.9213867, + "special": false, + "text": " that" + }, + { + "id": 4648, + "logprob": -1.4726562, + "special": false, + "text": " uses" + }, + { + "id": 13345, + "logprob": -1.5039062, + "special": false, + "text": " artificial" + }, + { + "id": 11454, + "logprob": -0.021652222, + "special": false, + "text": " neural" + } + ] + }, + "generated_text": " - Deep Learning\nDeep Learning is a subfield of machine learning that uses artificial neural" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 17, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 428, + "logprob": -1.5800781, + "special": false, + "text": " -" + }, + { + "id": 18147, + "logprob": -3.3242188, + "special": false, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -0.31835938, + "special": false, + "text": " Learning" + }, + { + "id": 187, + "logprob": -2.5644531, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.5957031, + "special": false, + "text": "Deep" + }, + { + "id": 20727, + "logprob": -0.69628906, + "special": false, + "text": " Learning" + }, + { + "id": 310, + "logprob": -0.68603516, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.5258789, + "special": false, + "text": " a" + }, + { + "id": 749, + "logprob": -1.859375, + "special": false, + "text": " sub" + }, + { + "id": 3423, + "logprob": -0.6166992, + "special": false, + "text": "field" + }, + { + "id": 273, + "logprob": -0.056762695, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.0703125, + "special": false, + "text": " machine" + }, + { + "id": 4715, + "logprob": -0.011428833, + "special": false, + "text": " learning" + }, + { + "id": 326, + "logprob": -0.9213867, + "special": false, + "text": " that" + }, + { + "id": 4648, + "logprob": -1.4726562, + "special": false, + "text": " uses" + }, + { + "id": 13345, + "logprob": -1.5039062, + "special": false, + "text": " artificial" + }, + { + "id": 11454, + "logprob": -0.021652222, + "special": false, + "text": " neural" + } + ] + }, + "generated_text": " - Deep Learning\nDeep Learning is a subfield of machine learning that uses artificial neural" + } +] diff --git a/server/text_generation_server/models/mpt.py b/server/text_generation_server/models/mpt.py index b38f6218..d3d127c1 100644 --- a/server/text_generation_server/models/mpt.py +++ b/server/text_generation_server/models/mpt.py @@ -1,13 +1,15 @@ import torch import torch.distributed +from typing import Optional, Type from opentelemetry import trace -from transformers import AutoTokenizer, PretrainedConfig -from typing import Optional +from transformers import AutoTokenizer, PretrainedConfig, PreTrainedTokenizerBase from huggingface_hub import hf_hub_download import json from text_generation_server.models import CausalLM +from text_generation_server.models.causal_lm import CausalLMBatch +from text_generation_server.pb import generate_pb2 from text_generation_server.models.custom_modeling.mpt_modeling import ( MPTForCausalLM, ) @@ -20,6 +22,20 @@ from text_generation_server.utils import ( tracer = trace.get_tracer(__name__) +class MPTCausalLMBatch(CausalLMBatch): + @classmethod + def from_pb( + cls, + pb: generate_pb2.Batch, + tokenizer: PreTrainedTokenizerBase, + dtype: torch.dtype, + device: torch.device, + ) -> "CausalLMBatch": + batch = super().from_pb(pb=pb, tokenizer=tokenizer, dtype=dtype, device=device) + batch.keys_head_dim_last = False + return batch + + class MPTSharded(CausalLM): def __init__( self, @@ -72,3 +88,7 @@ class MPTSharded(CausalLM): rank=rank, world_size=world_size, ) + + @property + def batch_type(self) -> Type[CausalLMBatch]: + return MPTCausalLMBatch