moved files

This commit is contained in:
rsnm2 2023-08-24 18:37:03 +00:00
parent cd3349f53b
commit a973cf4922
8 changed files with 72 additions and 3 deletions

View File

@ -61,4 +61,11 @@ python3 server/text_generation_server/cli.py serve bigscience/bloom-560m
Launch Router
```shell
make router-dev
```
Install FastAPI/Uvicorn
```shell
pip install fastapi
pip install "uvicorn[standard]"
```

42
deepsparse/main.py Normal file
View File

@ -0,0 +1,42 @@
import uvicorn, fastapi
from threading import Thread
from queue import Queue
from router import DeepSparseRouter, batching_task
from utils import GenerateRequest
TOKENIZER_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/deployment"
MODEL_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/model.onnx/model.onnx"
# setup router
router = DeepSparseRouter(
model_path=MODEL_PATH,
tokenizer_path=TOKENIZER_PATH
)
# start background routing task
batching_thread = Thread(target=batching_task, args=[router])
batching_thread.start()
app = fastapi.FastAPI()
@app.post("/generate")
def generate(prompt:str, max_generated_tokens:int):
response_stream = Queue()
# submit request to the router
router.submit_request(
generate_request=GenerateRequest(
prompt=prompt,
max_generated_tokens=max_generated_tokens,
response_stream=response_stream
)
)
response_string = prompt
generation = response_stream.get()
while not generation.stopped:
response_string += generation.token
generation = response_stream.get()
return generation

View File

@ -1,6 +1,7 @@
from queue import Queue
from typing import List, Dict, Optional, Tuple
from server.deepsparse.service.service import DeepSparseService
from server.deepsparse.service.causal_lm import DeepSparseCausalLM
from server.deepsparse.utils import CachedBatch, Batch, Generation, GenerateRequest, Request
# TODO: implement logic for maximum size of the queue based on memory usage
@ -47,11 +48,30 @@ class DeepSparseQueue:
return (batch, generate_requests)
class DeepSparseRouter:
def __init__(self, service: DeepSparseService):
self.service: DeepSparseService = service
def __init__(
self,
service: Optional[DeepSparseService],
model_path: Optional[str],
tokenizer_path: Optional[str]
):
assert (
service is not None or
(model_path is not None and tokenizer_path is not None)
)
if service is not None:
self.service = service
else:
self.service = DeepSparseService(
model = DeepSparseCausalLM(
model_path=model_path,
tokenizer_path=tokenizer_path
)
)
self.queue: DeepSparseQueue = DeepSparseQueue()
def generate(self, generate_request: GenerateRequest):
def submit_request(self, generate_request: GenerateRequest):
self.queue.append(generate_request)
def prefill(