From 1700d1190552e42761927a3227d6745ece84872c Mon Sep 17 00:00:00 2001 From: rsnm2 Date: Fri, 1 Sep 2023 18:29:36 +0000 Subject: [PATCH] updated --- README.md | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 96e72c53..bdc6df9e 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,59 @@ Install: ```bash -pip3 install deepsparse-nightly[transformer] fastapi uvicorn +pip3 install deepsparse-nightly[transformers] fastapi uvicorn ``` Download model: - ```bash sparsezoo.download zoo:nlg/text_generation/codegen_multi-350m/pytorch/huggingface/bigquery_thepile/base_quant-none --save-dir codegen-quant +``` + +Launch server: +```bash +python3 deepsparse/server.py --deployment-dir ./codegen-quant/deployment +``` + +Make requests: +```python +import requests +from threading import Thread +import json + +url = "http://127.0.0.1:5543/generate" +sequence = "Write a function for computing a fibonacci sequence: \n\ndef fib(n):" +# sequence = "def fib(n):" + +def request_task(max_new_tokens): + obj = { + "inputs": sequence, + "generation_parameters": { + "max_new_tokens":max_new_tokens, + # "repetition_penalty": 1.1, + # "do_sample": True, + # "temperature": 1.1, + # "top_k": 3, + # "top_p": 0.9, + # "seed": 42, + } + } + with requests.post(url, json=obj) as r: + print(max_new_tokens) + dct = json.loads(r.text) + # print(dct) + print(f'{sequence}{dct["response_text"]}') + +max_new_tokens_lst = [100, 50, 25] +request_ts = [ + Thread(target=request_task, args=[max_new_tokens]) + for max_new_tokens in max_new_tokens_lst +] + +import time +for request_t in request_ts: + request_t.start() + time.sleep(0.1) + +for request_t in request_ts: + request_t.join() ``` \ No newline at end of file