From a973cf49227ead5c71a2b8f9a482c9693cee0201 Mon Sep 17 00:00:00 2001 From: rsnm2 Date: Thu, 24 Aug 2023 18:37:03 +0000 Subject: [PATCH] moved files --- README.md | 7 ++++ deepsparse/main.py | 42 +++++++++++++++++++ {server/deepsparse => deepsparse}/router.py | 26 ++++++++++-- .../service/causal_lm.py | 0 .../service/model.py | 0 .../service/service.py | 0 {server/deepsparse => deepsparse}/utils.py | 0 server/deepsparse/server.py | 0 8 files changed, 72 insertions(+), 3 deletions(-) create mode 100644 deepsparse/main.py rename {server/deepsparse => deepsparse}/router.py (89%) rename {server/deepsparse => deepsparse}/service/causal_lm.py (100%) rename {server/deepsparse => deepsparse}/service/model.py (100%) rename {server/deepsparse => deepsparse}/service/service.py (100%) rename {server/deepsparse => deepsparse}/utils.py (100%) delete mode 100644 server/deepsparse/server.py diff --git a/README.md b/README.md index e2bae362..91167bbc 100644 --- a/README.md +++ b/README.md @@ -61,4 +61,11 @@ python3 server/text_generation_server/cli.py serve bigscience/bloom-560m Launch Router ```shell make router-dev +``` + +Install FastAPI/Uvicorn + +```shell +pip install fastapi +pip install "uvicorn[standard]" ``` \ No newline at end of file diff --git a/deepsparse/main.py b/deepsparse/main.py new file mode 100644 index 00000000..87ebaef9 --- /dev/null +++ b/deepsparse/main.py @@ -0,0 +1,42 @@ +import uvicorn, fastapi +from threading import Thread +from queue import Queue + +from router import DeepSparseRouter, batching_task +from utils import GenerateRequest + +TOKENIZER_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/deployment" +MODEL_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/model.onnx/model.onnx" + +# setup router +router = DeepSparseRouter( + model_path=MODEL_PATH, + tokenizer_path=TOKENIZER_PATH +) + +# start background routing task +batching_thread = Thread(target=batching_task, args=[router]) +batching_thread.start() + +app = fastapi.FastAPI() + +@app.post("/generate") +def generate(prompt:str, max_generated_tokens:int): + response_stream = Queue() + + # submit request to the router + router.submit_request( + generate_request=GenerateRequest( + prompt=prompt, + max_generated_tokens=max_generated_tokens, + response_stream=response_stream + ) + ) + + response_string = prompt + generation = response_stream.get() + while not generation.stopped: + response_string += generation.token + generation = response_stream.get() + + return generation \ No newline at end of file diff --git a/server/deepsparse/router.py b/deepsparse/router.py similarity index 89% rename from server/deepsparse/router.py rename to deepsparse/router.py index 950bea68..2824bc61 100644 --- a/server/deepsparse/router.py +++ b/deepsparse/router.py @@ -1,6 +1,7 @@ from queue import Queue from typing import List, Dict, Optional, Tuple from server.deepsparse.service.service import DeepSparseService +from server.deepsparse.service.causal_lm import DeepSparseCausalLM from server.deepsparse.utils import CachedBatch, Batch, Generation, GenerateRequest, Request # TODO: implement logic for maximum size of the queue based on memory usage @@ -47,11 +48,30 @@ class DeepSparseQueue: return (batch, generate_requests) class DeepSparseRouter: - def __init__(self, service: DeepSparseService): - self.service: DeepSparseService = service + def __init__( + self, + service: Optional[DeepSparseService], + model_path: Optional[str], + tokenizer_path: Optional[str] + ): + assert ( + service is not None or + (model_path is not None and tokenizer_path is not None) + ) + + if service is not None: + self.service = service + else: + self.service = DeepSparseService( + model = DeepSparseCausalLM( + model_path=model_path, + tokenizer_path=tokenizer_path + ) + ) + self.queue: DeepSparseQueue = DeepSparseQueue() - def generate(self, generate_request: GenerateRequest): + def submit_request(self, generate_request: GenerateRequest): self.queue.append(generate_request) def prefill( diff --git a/server/deepsparse/service/causal_lm.py b/deepsparse/service/causal_lm.py similarity index 100% rename from server/deepsparse/service/causal_lm.py rename to deepsparse/service/causal_lm.py diff --git a/server/deepsparse/service/model.py b/deepsparse/service/model.py similarity index 100% rename from server/deepsparse/service/model.py rename to deepsparse/service/model.py diff --git a/server/deepsparse/service/service.py b/deepsparse/service/service.py similarity index 100% rename from server/deepsparse/service/service.py rename to deepsparse/service/service.py diff --git a/server/deepsparse/utils.py b/deepsparse/utils.py similarity index 100% rename from server/deepsparse/utils.py rename to deepsparse/utils.py diff --git a/server/deepsparse/server.py b/server/deepsparse/server.py deleted file mode 100644 index e69de29b..00000000