moved files

This commit is contained in:
rsnm2 2023-08-24 18:37:03 +00:00
parent cd3349f53b
commit a973cf4922
8 changed files with 72 additions and 3 deletions

View File

@ -62,3 +62,10 @@ Launch Router
```shell ```shell
make router-dev make router-dev
``` ```
Install FastAPI/Uvicorn
```shell
pip install fastapi
pip install "uvicorn[standard]"
```

42
deepsparse/main.py Normal file
View File

@ -0,0 +1,42 @@
import uvicorn, fastapi
from threading import Thread
from queue import Queue
from router import DeepSparseRouter, batching_task
from utils import GenerateRequest
TOKENIZER_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/deployment"
MODEL_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/model.onnx/model.onnx"
# setup router
router = DeepSparseRouter(
model_path=MODEL_PATH,
tokenizer_path=TOKENIZER_PATH
)
# start background routing task
batching_thread = Thread(target=batching_task, args=[router])
batching_thread.start()
app = fastapi.FastAPI()
@app.post("/generate")
def generate(prompt:str, max_generated_tokens:int):
response_stream = Queue()
# submit request to the router
router.submit_request(
generate_request=GenerateRequest(
prompt=prompt,
max_generated_tokens=max_generated_tokens,
response_stream=response_stream
)
)
response_string = prompt
generation = response_stream.get()
while not generation.stopped:
response_string += generation.token
generation = response_stream.get()
return generation

View File

@ -1,6 +1,7 @@
from queue import Queue from queue import Queue
from typing import List, Dict, Optional, Tuple from typing import List, Dict, Optional, Tuple
from server.deepsparse.service.service import DeepSparseService from server.deepsparse.service.service import DeepSparseService
from server.deepsparse.service.causal_lm import DeepSparseCausalLM
from server.deepsparse.utils import CachedBatch, Batch, Generation, GenerateRequest, Request from server.deepsparse.utils import CachedBatch, Batch, Generation, GenerateRequest, Request
# TODO: implement logic for maximum size of the queue based on memory usage # TODO: implement logic for maximum size of the queue based on memory usage
@ -47,11 +48,30 @@ class DeepSparseQueue:
return (batch, generate_requests) return (batch, generate_requests)
class DeepSparseRouter: class DeepSparseRouter:
def __init__(self, service: DeepSparseService): def __init__(
self.service: DeepSparseService = service self,
service: Optional[DeepSparseService],
model_path: Optional[str],
tokenizer_path: Optional[str]
):
assert (
service is not None or
(model_path is not None and tokenizer_path is not None)
)
if service is not None:
self.service = service
else:
self.service = DeepSparseService(
model = DeepSparseCausalLM(
model_path=model_path,
tokenizer_path=tokenizer_path
)
)
self.queue: DeepSparseQueue = DeepSparseQueue() self.queue: DeepSparseQueue = DeepSparseQueue()
def generate(self, generate_request: GenerateRequest): def submit_request(self, generate_request: GenerateRequest):
self.queue.append(generate_request) self.queue.append(generate_request)
def prefill( def prefill(