diff --git a/deepsparse/logits_process.py b/deepsparse/logits_process.py new file mode 100644 index 00000000..280d37a6 --- /dev/null +++ b/deepsparse/logits_process.py @@ -0,0 +1,135 @@ +import numpy as np +from typing import List + +def softmax(x: np.ndarray, axis=None) -> np.ndarray: + x = x - x.max(axis=axis, keepdims=True) + y = np.exp(x) # TODO: make this happen in place? + return y / y.sum(axis=axis, keepdims=True) + +class LogitsWarper: + def __call__(self, scores: np.ndarray) -> np.ndarray: + raise NotImplementedError( + f"{self.__class__} is an abstract class. Only classes inheriting this class can be called." + ) + +class LogitsProcessor: + def __call__(self, input_ids: np.ndarray, scores: np.ndarray) -> np.ndarray: + raise NotImplementedError( + f"{self.__class__} is an abstract class. Only classes inheriting this class can be called." + ) + +class LogitsWarpers: + def __init__( + self, + temperature=1.0, + top_k=None, + top_p=None, + ): + self.warpers = [] + + if temperature is not None and temperature != 1.0: + self.warpers.append(TemperatureLogitsWarper(temperature=temperature)) + if top_k is not None and top_k != 0: + self.warpers.append(TopKLogitsWarper(top_k=top_k)) + if top_p is not None and top_p < 1.0: + self.warpers.append(TopPLogitsWarper(top_p=top_p)) + + def __call__(self, scores: np.ndarray) -> np.ndarray: + for warper in self.warpers: + scores = warper(scores) + return scores + +# https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L205 +# note: this handles arbitrary batch size +class TemperatureLogitsWarper(LogitsWarper): + def __init__(self, temperature: float): + if not isinstance(temperature, float) or not (temperature > 0): + except_msg = ( + f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token " + "scores will be invalid." + ) + if isinstance(temperature, float) and temperature == 0.0: + except_msg += " If you're looking for greedy decoding strategies, set `do_sample=False`." + raise ValueError(except_msg) + + self.temperature = temperature + + def __call__(self, scores: np.ndarray) -> np.ndarray: + scores = scores / self.temperature + return scores + +# https://github.com/huggingface/transformers/blob/v4.32.0/src/transformers/generation/logits_process.py#L361 +# TODO: update logic to handle b > 1 +class TopPLogitsWarper(LogitsWarper): + def __init__(self, top_p: float): + if not isinstance(top_p, float) or top_p < 0 or top_p > 1.0: + raise ValueError(f"`top_p` has to be a float > 0 and < 1, but is {top_p}") + + self.top_p = top_p + + def __call__(self, scores: np.ndarray): + # assert shape is [1, vocab_size] + assert len(scores.shape) == 2 + assert scores.shape[0] == 1 + + sorted_indices = np.argsort(scores, axis=-1) + sorted_scores = np.take_along_axis(scores, sorted_indices, axis=-1) + + # sort, grabbing all those outside top_p + cumulative_probs = softmax(sorted_scores, axis=-1).cumsum(axis=-1) + sorted_indices_to_remove = cumulative_probs <= (1 - self.top_p) + # note: this relies on b=1 + indices_to_remove = sorted_indices[sorted_indices_to_remove] + + # set removed indices logits to -Inf (never selected) + scores[:, indices_to_remove] = -float("Inf") + + return scores + +# https://github.com/huggingface/transformers/blob/v4.32.0/src/transformers/generation/logits_process.py#L433 +# note: this handles arbitrary batch size +class TopKLogitsWarper(LogitsWarper): + def __init__(self, top_k: int): + if not isinstance(top_k, int) or top_k <= 0: + raise ValueError(f"`top_k` has to be a strictly positive integer, but is {top_k}") + + self.top_k = top_k + + def __call__(self, scores: np.ndarray): + # assert shape is [batch, vocab_size] + assert len(scores.shape) == 2 + + top_k = min(self.top_k, scores.shape[-1]) + + # sort, grabbing all except the top k + indices_to_remove = np.argsort(scores, axis=-1)[:,:-top_k] + + # set removed indices logits to -Inf (never selected) + np.put_along_axis(scores, indices_to_remove, -float("Inf"), axis=-1) + + return scores + +# https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L270 +# TODO: update logic to handle b > 1 +class RepetitionPenaltyLogitsProcessor(LogitsProcessor): + def __init__(self, penalty: float): + if not isinstance(penalty, float) or not (penalty > 0): + raise ValueError(f"`penalty` has to be a strictly positive float, but is {penalty}") + + self.penalty = penalty + + def __call__(self, input_ids: np.ndarray, scores: np.ndarray) -> np.ndarray: + # assert shape is [1, vocab_size] + assert len(scores.shape) == 2 + assert scores.shape[0] == 1 + + # assert shape is [1, seq_len] + assert len(input_ids.shape) == 2 + assert input_ids.shape[0] == 1 + + score = scores[:, input_ids[0]] # torch.gather + score = np.where(score < 0, score * self.penalty, score / self.penalty) + scores[:, input_ids[0]] = score # torch.scatter + + return scores + diff --git a/deepsparse/server.py b/deepsparse/server.py new file mode 100644 index 00000000..069edabf --- /dev/null +++ b/deepsparse/server.py @@ -0,0 +1,94 @@ +import fastapi, uvicorn +from sse_starlette.sse import EventSourceResponse +from contextlib import asynccontextmanager + +from threading import Thread +from queue import Queue +from typing import Optional + +from router import DeepSparseRouter, batching_task +from utils import GenerateRequestInputs, GenerateRequestOutputs, GenerateRequest + +TOKENIZER_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/deployment" +MODEL_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/model.onnx/model.onnx" +MESSAGE_STREAM_RETRY_TIMEOUT = 15000 # milisecond + +artifacts = {} + +@asynccontextmanager +async def lifespan(app: fastapi.FastAPI): + print("\n-------------------- Building Router --------------------\n") + artifacts["router"] = DeepSparseRouter(model_path=MODEL_PATH, tokenizer_path=TOKENIZER_PATH) + + print("\n-------------------- Starting Batching Task --------------------\n") + batching_thread = Thread(target=batching_task, args=[artifacts["router"]]) + batching_thread.start() + + print("\n-------------------- Launching App --------------------\n") + yield + + print("\n-------------------- Shutting Down Batching Task --------------------\n") + artifacts["router"].stop_batching_task() + batching_thread.join() + +app = fastapi.FastAPI(lifespan=lifespan) + +@app.post("/generate") +def generate(inputs: GenerateRequestInputs) -> GenerateRequestOutputs: + # convert input to generate request + generate_request = GenerateRequest.from_gr_inputs(inputs) + + # submit request to the router + artifacts["router"].submit_request(generate_request) + + gr_outputs = GenerateRequestOutputs() + + # build response + generation = generate_request.response_stream.get() + while not generation.stopped: + gr_outputs.response_text += generation.token + generation = generate_request.response_stream.get() + + gr_outputs.finish_reason = generation.finish_reason + return gr_outputs + +@app.post("/generate_stream") +async def generate_stream(request: fastapi.Request, inputs: GenerateRequestInputs): + + # convert input to generate request + generate_request = GenerateRequest.from_gr_inputs(inputs) + + # submit request to the router + artifacts["router"].submit_request(generate_request) + + async def token_generator(): + while True: + if await request.is_disconnected(): + break + + generation = generate_request.response_stream.get() + if not generation.stopped: + yield { + "event": "token_generated", + "id": "message_id", + "retry": MESSAGE_STREAM_RETRY_TIMEOUT, + "data": generation.token + } + else: + yield { + "event": "token_generated", + "id": "message_id", + "retry": MESSAGE_STREAM_RETRY_TIMEOUT, + "data": generation.finish_reason + } + + return EventSourceResponse(token_generator()) + +if __name__ == "__main__": + uvicorn.run( + "server:app", + host="0.0.0.0", + port=5543, + workers=1, # limit to one process to avoid copying the model + # reload=True + ) \ No newline at end of file diff --git a/server-dev.ipynb b/server-dev.ipynb index 1336ddf5..964c9f3d 100644 --- a/server-dev.ipynb +++ b/server-dev.ipynb @@ -13,28 +13,187 @@ }, { "cell_type": "code", - "execution_count": 91, - "id": "7513b2a1-0749-44b5-88a9-d91d5b175e8e", + "execution_count": 108, + "id": "b5fd745b-6681-4e3d-81f2-2c721fde9318", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'http://127.0.0.1:5543/generate_stream'" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "4a6b2bf7-aa4e-45a3-b6c3-5f0e20b6a0e0", + "metadata": {}, + "outputs": [ + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[110], line 16\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msseclient\u001b[39;00m\n\u001b[1;32m 3\u001b[0m obj \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m: sequence,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgeneration_parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 13\u001b[0m }\n\u001b[1;32m 14\u001b[0m }\n\u001b[0;32m---> 16\u001b[0m stream_response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m client \u001b[38;5;241m=\u001b[39m sseclient\u001b[38;5;241m.\u001b[39mSSEClient(stream_response)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# Loop forever (while connection \"open\")\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connection.py:454\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:1377\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1375\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1377\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1378\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1379\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:320\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:281\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 281\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 282\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 283\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import sseclient\n", + "\n", + "obj = {\n", + " \"inputs\": sequence,\n", + " \"generation_parameters\": {\n", + " \"max_new_tokens\":100,\n", + " # \"repetion_penalty\": 2.0,\n", + " # \"do_sample\": False,\n", + " # \"temperature\": 1.0,\n", + " # \"top_k\": 1,\n", + " # \"top_p\": 0.9,\n", + " # \"seed\": 43,\n", + " }\n", + " }\n", + "\n", + "stream_response = requests.post(url, json=obj, stream=True)\n", + "\n", + "client = sseclient.SSEClient(stream_response)\n", + "\n", + "# Loop forever (while connection \"open\")\n", + "for event in client.events():\n", + " print (\"got a new event from server\")\n", + " pprint.pprint(event.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "9cac401e-3a69-4f9c-b074-f1ba8e06df49", "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sseclient\n", + " Downloading sseclient-0.0.27.tar.gz (7.5 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: requests>=2.9 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from sseclient) (2.31.0)\n", + "Requirement already satisfied: six in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from sseclient) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (3.1.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (2.0.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (2023.5.7)\n", + "Building wheels for collected packages: sseclient\n", + " Building wheel for sseclient (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for sseclient: filename=sseclient-0.0.27-py3-none-any.whl size=5565 sha256=56a8fb4039994d2facce58dba0bad7a3af7f5a9569e80b70ec2adab94dc46a35\n", + " Stored in directory: /home/robertgshaw/.cache/pip/wheels/73/21/3f/9d20c05c1c9312ab61bc7bf63bb2c189a71e800cb028fa8d2a\n", + "Successfully built sseclient\n", + "Installing collected packages: sseclient\n", + "Successfully installed sseclient-0.0.27\n" + ] + } + ], + "source": [ + "!pip install sseclient" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "80272ae9-6c07-43a3-920e-4d13a5905685", + "metadata": {}, + "outputs": [ + { + "ename": "InvalidURL", + "evalue": "Failed to parse: ", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/util/url.py:425\u001b[0m, in \u001b[0;36mparse_url\u001b[0;34m(url)\u001b[0m\n\u001b[1;32m 424\u001b[0m auth \u001b[38;5;241m=\u001b[39m auth \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 425\u001b[0m host, port \u001b[38;5;241m=\u001b[39m \u001b[43m_HOST_PORT_RE\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmatch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhost_port\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m() \u001b[38;5;66;03m# type: ignore[union-attr]\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m auth \u001b[38;5;129;01mand\u001b[39;00m normalize_uri:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'groups'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mLocationParseError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:434\u001b[0m, in \u001b[0;36mPreparedRequest.prepare_url\u001b[0;34m(self, url, params)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m scheme, auth, host, port, path, query, fragment \u001b[38;5;241m=\u001b[39m \u001b[43mparse_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m LocationParseError \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/util/url.py:451\u001b[0m, in \u001b[0;36mparse_url\u001b[0;34m(url)\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mAttributeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LocationParseError(source_url) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# For the sake of backwards compatibility we put empty\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;66;03m# string values for path if there are any defined values\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;66;03m# beyond the path in the URL.\u001b[39;00m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;66;03m# TODO: Remove this when we break backwards compatibility.\u001b[39;00m\n", + "\u001b[0;31mLocationParseError\u001b[0m: Failed to parse: ", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mInvalidURL\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[96], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msseclient\u001b[39;00m\u001b[38;5;241m,\u001b[39m \u001b[38;5;21;01mpprint\u001b[39;00m\n\u001b[1;32m 3\u001b[0m stream_response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(url, json\u001b[38;5;241m=\u001b[39mobj, stream\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 4\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[43msseclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSSEClient\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream_response\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Loop forever (while connection \"open\")\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m event \u001b[38;5;129;01min\u001b[39;00m client\u001b[38;5;241m.\u001b[39mevents():\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/sseclient.py:48\u001b[0m, in \u001b[0;36mSSEClient.__init__\u001b[0;34m(self, url, last_id, retry, session, chunk_size, **kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Keep data here as it streams in\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuf \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 48\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/sseclient.py:56\u001b[0m, in \u001b[0;36mSSEClient._connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;66;03m# Use session if set. Otherwise fall back to requests module.\u001b[39;00m\n\u001b[1;32m 55\u001b[0m requester \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msession \u001b[38;5;129;01mor\u001b[39;00m requests\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp \u001b[38;5;241m=\u001b[39m \u001b[43mrequester\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequests_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp_iterator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_content()\n\u001b[1;32m 58\u001b[0m encoding \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp\u001b[38;5;241m.\u001b[39mapparent_encoding\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:73\u001b[0m, in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(url, params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 63\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request.\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:575\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;66;03m# Create the Request.\u001b[39;00m\n\u001b[1;32m 563\u001b[0m req \u001b[38;5;241m=\u001b[39m Request(\n\u001b[1;32m 564\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod\u001b[38;5;241m.\u001b[39mupper(),\n\u001b[1;32m 565\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 573\u001b[0m hooks\u001b[38;5;241m=\u001b[39mhooks,\n\u001b[1;32m 574\u001b[0m )\n\u001b[0;32m--> 575\u001b[0m prep \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 577\u001b[0m proxies \u001b[38;5;241m=\u001b[39m proxies \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 579\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmerge_environment_settings(\n\u001b[1;32m 580\u001b[0m prep\u001b[38;5;241m.\u001b[39murl, proxies, stream, verify, cert\n\u001b[1;32m 581\u001b[0m )\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:486\u001b[0m, in \u001b[0;36mSession.prepare_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 483\u001b[0m auth \u001b[38;5;241m=\u001b[39m get_netrc_auth(request\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m 485\u001b[0m p \u001b[38;5;241m=\u001b[39m PreparedRequest()\n\u001b[0;32m--> 486\u001b[0m \u001b[43mp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupper\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdict_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCaseInsensitiveDict\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_cookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43mhooks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_hooks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhooks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhooks\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 499\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m p\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:368\u001b[0m, in \u001b[0;36mPreparedRequest.prepare\u001b[0;34m(self, method, url, headers, files, data, params, auth, cookies, hooks, json)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Prepares the entire request with the given parameters.\"\"\"\u001b[39;00m\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_method(method)\n\u001b[0;32m--> 368\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_headers(headers)\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_cookies(cookies)\n", + "File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:436\u001b[0m, in \u001b[0;36mPreparedRequest.prepare_url\u001b[0;34m(self, url, params)\u001b[0m\n\u001b[1;32m 434\u001b[0m scheme, auth, host, port, path, query, fragment \u001b[38;5;241m=\u001b[39m parse_url(url)\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m LocationParseError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 436\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidURL(\u001b[38;5;241m*\u001b[39me\u001b[38;5;241m.\u001b[39margs)\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scheme:\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MissingSchema(\n\u001b[1;32m 440\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid URL \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: No scheme supplied. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 441\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPerhaps you meant https://\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 442\u001b[0m )\n", + "\u001b[0;31mInvalidURL\u001b[0m: Failed to parse: " + ] + } + ], + "source": [ + "import sseclient, pprint\n", + "\n", + "stream_response = requests.get(url, json=obj, stream=True)\n", + "client = sseclient.SSEClient(stream_response)\n", + "\n", + "# Loop forever (while connection \"open\")\n", + "for event in client.events():\n", + " print (\"got a new event from server\")\n", + " pprint.pprint(event.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "7513b2a1-0749-44b5-88a9-d91d5b175e8e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Exception in thread Thread-179:\n", + "Traceback (most recent call last):\n", + " File \"/home/robertgshaw/.conda/envs/dscb/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n", + " self.run()\n", + " File \"/home/robertgshaw/.conda/envs/dscb/lib/python3.9/threading.py\", line 917, in run\n", + " self._target(*self._args, **self._kwargs)\n", + " File \"/tmp/ipykernel_9240/1133841396.py\", line 26, in request_task\n", + "TypeError: 'NoneType' object is not subscriptable\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "55\n", - "{'response_text': '\\n if n == 0:\\n return 0\\n elif n == 1:\\n return 1\\n else:\\n return fib(n-1) + fib(n-2)\\n\\n# Driver function to test above function\\nn', 'finish_reason': 1}\n", - "Finish the following function for computing a fibonacci sequence: \n", - "\n", - "def fib(n):\n", - " if n == 0:\n", - " return 0\n", - " elif n == 1:\n", - " return 1\n", - " else:\n", - " return fib(n-1) + fib(n-2)\n", - "\n", - "# Driver function to test above function\n", - "n\n" + "None\n" ] } ], @@ -43,7 +202,7 @@ "from threading import Thread\n", "import json\n", "\n", - "url = \"http://127.0.0.1:5543/generate\"\n", + "url = \"http://127.0.0.1:5543/generate_stream\"\n", "sequence = \"Finish the following function for computing a fibonacci sequence: \\n\\ndef fib(n):\"\n", "# sequence = \"def fib(n):\"\n", "\n",