mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
added files
This commit is contained in:
parent
d02993a5c3
commit
77ac1648c5
135
deepsparse/logits_process.py
Normal file
135
deepsparse/logits_process.py
Normal file
@ -0,0 +1,135 @@
|
||||
import numpy as np
|
||||
from typing import List
|
||||
|
||||
def softmax(x: np.ndarray, axis=None) -> np.ndarray:
|
||||
x = x - x.max(axis=axis, keepdims=True)
|
||||
y = np.exp(x) # TODO: make this happen in place?
|
||||
return y / y.sum(axis=axis, keepdims=True)
|
||||
|
||||
class LogitsWarper:
|
||||
def __call__(self, scores: np.ndarray) -> np.ndarray:
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
|
||||
)
|
||||
|
||||
class LogitsProcessor:
|
||||
def __call__(self, input_ids: np.ndarray, scores: np.ndarray) -> np.ndarray:
|
||||
raise NotImplementedError(
|
||||
f"{self.__class__} is an abstract class. Only classes inheriting this class can be called."
|
||||
)
|
||||
|
||||
class LogitsWarpers:
|
||||
def __init__(
|
||||
self,
|
||||
temperature=1.0,
|
||||
top_k=None,
|
||||
top_p=None,
|
||||
):
|
||||
self.warpers = []
|
||||
|
||||
if temperature is not None and temperature != 1.0:
|
||||
self.warpers.append(TemperatureLogitsWarper(temperature=temperature))
|
||||
if top_k is not None and top_k != 0:
|
||||
self.warpers.append(TopKLogitsWarper(top_k=top_k))
|
||||
if top_p is not None and top_p < 1.0:
|
||||
self.warpers.append(TopPLogitsWarper(top_p=top_p))
|
||||
|
||||
def __call__(self, scores: np.ndarray) -> np.ndarray:
|
||||
for warper in self.warpers:
|
||||
scores = warper(scores)
|
||||
return scores
|
||||
|
||||
# https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L205
|
||||
# note: this handles arbitrary batch size
|
||||
class TemperatureLogitsWarper(LogitsWarper):
|
||||
def __init__(self, temperature: float):
|
||||
if not isinstance(temperature, float) or not (temperature > 0):
|
||||
except_msg = (
|
||||
f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token "
|
||||
"scores will be invalid."
|
||||
)
|
||||
if isinstance(temperature, float) and temperature == 0.0:
|
||||
except_msg += " If you're looking for greedy decoding strategies, set `do_sample=False`."
|
||||
raise ValueError(except_msg)
|
||||
|
||||
self.temperature = temperature
|
||||
|
||||
def __call__(self, scores: np.ndarray) -> np.ndarray:
|
||||
scores = scores / self.temperature
|
||||
return scores
|
||||
|
||||
# https://github.com/huggingface/transformers/blob/v4.32.0/src/transformers/generation/logits_process.py#L361
|
||||
# TODO: update logic to handle b > 1
|
||||
class TopPLogitsWarper(LogitsWarper):
|
||||
def __init__(self, top_p: float):
|
||||
if not isinstance(top_p, float) or top_p < 0 or top_p > 1.0:
|
||||
raise ValueError(f"`top_p` has to be a float > 0 and < 1, but is {top_p}")
|
||||
|
||||
self.top_p = top_p
|
||||
|
||||
def __call__(self, scores: np.ndarray):
|
||||
# assert shape is [1, vocab_size]
|
||||
assert len(scores.shape) == 2
|
||||
assert scores.shape[0] == 1
|
||||
|
||||
sorted_indices = np.argsort(scores, axis=-1)
|
||||
sorted_scores = np.take_along_axis(scores, sorted_indices, axis=-1)
|
||||
|
||||
# sort, grabbing all those outside top_p
|
||||
cumulative_probs = softmax(sorted_scores, axis=-1).cumsum(axis=-1)
|
||||
sorted_indices_to_remove = cumulative_probs <= (1 - self.top_p)
|
||||
# note: this relies on b=1
|
||||
indices_to_remove = sorted_indices[sorted_indices_to_remove]
|
||||
|
||||
# set removed indices logits to -Inf (never selected)
|
||||
scores[:, indices_to_remove] = -float("Inf")
|
||||
|
||||
return scores
|
||||
|
||||
# https://github.com/huggingface/transformers/blob/v4.32.0/src/transformers/generation/logits_process.py#L433
|
||||
# note: this handles arbitrary batch size
|
||||
class TopKLogitsWarper(LogitsWarper):
|
||||
def __init__(self, top_k: int):
|
||||
if not isinstance(top_k, int) or top_k <= 0:
|
||||
raise ValueError(f"`top_k` has to be a strictly positive integer, but is {top_k}")
|
||||
|
||||
self.top_k = top_k
|
||||
|
||||
def __call__(self, scores: np.ndarray):
|
||||
# assert shape is [batch, vocab_size]
|
||||
assert len(scores.shape) == 2
|
||||
|
||||
top_k = min(self.top_k, scores.shape[-1])
|
||||
|
||||
# sort, grabbing all except the top k
|
||||
indices_to_remove = np.argsort(scores, axis=-1)[:,:-top_k]
|
||||
|
||||
# set removed indices logits to -Inf (never selected)
|
||||
np.put_along_axis(scores, indices_to_remove, -float("Inf"), axis=-1)
|
||||
|
||||
return scores
|
||||
|
||||
# https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L270
|
||||
# TODO: update logic to handle b > 1
|
||||
class RepetitionPenaltyLogitsProcessor(LogitsProcessor):
|
||||
def __init__(self, penalty: float):
|
||||
if not isinstance(penalty, float) or not (penalty > 0):
|
||||
raise ValueError(f"`penalty` has to be a strictly positive float, but is {penalty}")
|
||||
|
||||
self.penalty = penalty
|
||||
|
||||
def __call__(self, input_ids: np.ndarray, scores: np.ndarray) -> np.ndarray:
|
||||
# assert shape is [1, vocab_size]
|
||||
assert len(scores.shape) == 2
|
||||
assert scores.shape[0] == 1
|
||||
|
||||
# assert shape is [1, seq_len]
|
||||
assert len(input_ids.shape) == 2
|
||||
assert input_ids.shape[0] == 1
|
||||
|
||||
score = scores[:, input_ids[0]] # torch.gather
|
||||
score = np.where(score < 0, score * self.penalty, score / self.penalty)
|
||||
scores[:, input_ids[0]] = score # torch.scatter
|
||||
|
||||
return scores
|
||||
|
94
deepsparse/server.py
Normal file
94
deepsparse/server.py
Normal file
@ -0,0 +1,94 @@
|
||||
import fastapi, uvicorn
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from threading import Thread
|
||||
from queue import Queue
|
||||
from typing import Optional
|
||||
|
||||
from router import DeepSparseRouter, batching_task
|
||||
from utils import GenerateRequestInputs, GenerateRequestOutputs, GenerateRequest
|
||||
|
||||
TOKENIZER_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/deployment"
|
||||
MODEL_PATH = "/home/robertgshaw/.cache/sparsezoo/neuralmagic/codegen_mono-350m-bigpython_bigquery_thepile-base/model.onnx/model.onnx"
|
||||
MESSAGE_STREAM_RETRY_TIMEOUT = 15000 # milisecond
|
||||
|
||||
artifacts = {}
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: fastapi.FastAPI):
|
||||
print("\n-------------------- Building Router --------------------\n")
|
||||
artifacts["router"] = DeepSparseRouter(model_path=MODEL_PATH, tokenizer_path=TOKENIZER_PATH)
|
||||
|
||||
print("\n-------------------- Starting Batching Task --------------------\n")
|
||||
batching_thread = Thread(target=batching_task, args=[artifacts["router"]])
|
||||
batching_thread.start()
|
||||
|
||||
print("\n-------------------- Launching App --------------------\n")
|
||||
yield
|
||||
|
||||
print("\n-------------------- Shutting Down Batching Task --------------------\n")
|
||||
artifacts["router"].stop_batching_task()
|
||||
batching_thread.join()
|
||||
|
||||
app = fastapi.FastAPI(lifespan=lifespan)
|
||||
|
||||
@app.post("/generate")
|
||||
def generate(inputs: GenerateRequestInputs) -> GenerateRequestOutputs:
|
||||
# convert input to generate request
|
||||
generate_request = GenerateRequest.from_gr_inputs(inputs)
|
||||
|
||||
# submit request to the router
|
||||
artifacts["router"].submit_request(generate_request)
|
||||
|
||||
gr_outputs = GenerateRequestOutputs()
|
||||
|
||||
# build response
|
||||
generation = generate_request.response_stream.get()
|
||||
while not generation.stopped:
|
||||
gr_outputs.response_text += generation.token
|
||||
generation = generate_request.response_stream.get()
|
||||
|
||||
gr_outputs.finish_reason = generation.finish_reason
|
||||
return gr_outputs
|
||||
|
||||
@app.post("/generate_stream")
|
||||
async def generate_stream(request: fastapi.Request, inputs: GenerateRequestInputs):
|
||||
|
||||
# convert input to generate request
|
||||
generate_request = GenerateRequest.from_gr_inputs(inputs)
|
||||
|
||||
# submit request to the router
|
||||
artifacts["router"].submit_request(generate_request)
|
||||
|
||||
async def token_generator():
|
||||
while True:
|
||||
if await request.is_disconnected():
|
||||
break
|
||||
|
||||
generation = generate_request.response_stream.get()
|
||||
if not generation.stopped:
|
||||
yield {
|
||||
"event": "token_generated",
|
||||
"id": "message_id",
|
||||
"retry": MESSAGE_STREAM_RETRY_TIMEOUT,
|
||||
"data": generation.token
|
||||
}
|
||||
else:
|
||||
yield {
|
||||
"event": "token_generated",
|
||||
"id": "message_id",
|
||||
"retry": MESSAGE_STREAM_RETRY_TIMEOUT,
|
||||
"data": generation.finish_reason
|
||||
}
|
||||
|
||||
return EventSourceResponse(token_generator())
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(
|
||||
"server:app",
|
||||
host="0.0.0.0",
|
||||
port=5543,
|
||||
workers=1, # limit to one process to avoid copying the model
|
||||
# reload=True
|
||||
)
|
191
server-dev.ipynb
191
server-dev.ipynb
@ -13,28 +13,187 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"id": "7513b2a1-0749-44b5-88a9-d91d5b175e8e",
|
||||
"execution_count": 108,
|
||||
"id": "b5fd745b-6681-4e3d-81f2-2c721fde9318",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'http://127.0.0.1:5543/generate_stream'"
|
||||
]
|
||||
},
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"url"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"id": "4a6b2bf7-aa4e-45a3-b6c3-5f0e20b6a0e0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[110], line 16\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msseclient\u001b[39;00m\n\u001b[1;32m 3\u001b[0m obj \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minputs\u001b[39m\u001b[38;5;124m\"\u001b[39m: sequence,\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgeneration_parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m: {\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 13\u001b[0m }\n\u001b[1;32m 14\u001b[0m }\n\u001b[0;32m---> 16\u001b[0m stream_response \u001b[38;5;241m=\u001b[39m \u001b[43mrequests\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpost\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 18\u001b[0m client \u001b[38;5;241m=\u001b[39m sseclient\u001b[38;5;241m.\u001b[39mSSEClient(stream_response)\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# Loop forever (while connection \"open\")\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:115\u001b[0m, in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(url, data\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, json\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 104\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a POST request.\u001b[39;00m\n\u001b[1;32m 105\u001b[0m \n\u001b[1;32m 106\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 112\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpost\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 584\u001b[0m send_kwargs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 585\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m\"\u001b[39m: timeout,\n\u001b[1;32m 586\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mallow_redirects\u001b[39m\u001b[38;5;124m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 587\u001b[0m }\n\u001b[1;32m 588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 700\u001b[0m start \u001b[38;5;241m=\u001b[39m preferred_clock()\n\u001b[1;32m 702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 706\u001b[0m elapsed \u001b[38;5;241m=\u001b[39m preferred_clock() \u001b[38;5;241m-\u001b[39m start\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 483\u001b[0m timeout \u001b[38;5;241m=\u001b[39m TimeoutSauce(connect\u001b[38;5;241m=\u001b[39mtimeout, read\u001b[38;5;241m=\u001b[39mtimeout)\n\u001b[1;32m 485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 501\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(err, request\u001b[38;5;241m=\u001b[39mrequest)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connectionpool.py:790\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)\u001b[0m\n\u001b[1;32m 787\u001b[0m response_conn \u001b[38;5;241m=\u001b[39m conn \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m release_conn \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;66;03m# Make the request on the HTTPConnection object\u001b[39;00m\n\u001b[0;32m--> 790\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 791\u001b[0m \u001b[43m \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 792\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 793\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 794\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 795\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 796\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 797\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 798\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 799\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresponse_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 800\u001b[0m \u001b[43m \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpreload_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 801\u001b[0m \u001b[43m \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdecode_content\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 802\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 803\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;66;03m# Everything went great!\u001b[39;00m\n\u001b[1;32m 806\u001b[0m clean_exit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connectionpool.py:536\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# Receive the response from the server\u001b[39;00m\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (BaseSSLError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_raise_timeout(err\u001b[38;5;241m=\u001b[39me, url\u001b[38;5;241m=\u001b[39murl, timeout_value\u001b[38;5;241m=\u001b[39mread_timeout)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/connection.py:454\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 451\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mresponse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m HTTPResponse\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# Get the response from http.client.HTTPConnection\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetresponse\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 457\u001b[0m assert_header_parsing(httplib_response\u001b[38;5;241m.\u001b[39mmsg)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:1377\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1375\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1377\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbegin\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1378\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m:\n\u001b[1;32m 1379\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mclose()\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:320\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;66;03m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m version, status, reason \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_read_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m status \u001b[38;5;241m!=\u001b[39m CONTINUE:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/http/client.py:281\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_read_status\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 281\u001b[0m line \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mreadline\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_MAXLINE\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miso-8859-1\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 282\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(line) \u001b[38;5;241m>\u001b[39m _MAXLINE:\n\u001b[1;32m 283\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LineTooLong(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstatus line\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecv_into\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 705\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_timeout_occurred \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sseclient\n",
|
||||
"\n",
|
||||
"obj = {\n",
|
||||
" \"inputs\": sequence,\n",
|
||||
" \"generation_parameters\": {\n",
|
||||
" \"max_new_tokens\":100,\n",
|
||||
" # \"repetion_penalty\": 2.0,\n",
|
||||
" # \"do_sample\": False,\n",
|
||||
" # \"temperature\": 1.0,\n",
|
||||
" # \"top_k\": 1,\n",
|
||||
" # \"top_p\": 0.9,\n",
|
||||
" # \"seed\": 43,\n",
|
||||
" }\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"stream_response = requests.post(url, json=obj, stream=True)\n",
|
||||
"\n",
|
||||
"client = sseclient.SSEClient(stream_response)\n",
|
||||
"\n",
|
||||
"# Loop forever (while connection \"open\")\n",
|
||||
"for event in client.events():\n",
|
||||
" print (\"got a new event from server\")\n",
|
||||
" pprint.pprint(event.data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 95,
|
||||
"id": "9cac401e-3a69-4f9c-b074-f1ba8e06df49",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting sseclient\n",
|
||||
" Downloading sseclient-0.0.27.tar.gz (7.5 kB)\n",
|
||||
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25hRequirement already satisfied: requests>=2.9 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from sseclient) (2.31.0)\n",
|
||||
"Requirement already satisfied: six in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from sseclient) (1.16.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (3.1.0)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (3.4)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (2.0.3)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /home/robertgshaw/.conda/envs/dscb/lib/python3.9/site-packages (from requests>=2.9->sseclient) (2023.5.7)\n",
|
||||
"Building wheels for collected packages: sseclient\n",
|
||||
" Building wheel for sseclient (setup.py) ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Created wheel for sseclient: filename=sseclient-0.0.27-py3-none-any.whl size=5565 sha256=56a8fb4039994d2facce58dba0bad7a3af7f5a9569e80b70ec2adab94dc46a35\n",
|
||||
" Stored in directory: /home/robertgshaw/.cache/pip/wheels/73/21/3f/9d20c05c1c9312ab61bc7bf63bb2c189a71e800cb028fa8d2a\n",
|
||||
"Successfully built sseclient\n",
|
||||
"Installing collected packages: sseclient\n",
|
||||
"Successfully installed sseclient-0.0.27\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install sseclient"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 96,
|
||||
"id": "80272ae9-6c07-43a3-920e-4d13a5905685",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "InvalidURL",
|
||||
"evalue": "Failed to parse: <Response [405]>",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/util/url.py:425\u001b[0m, in \u001b[0;36mparse_url\u001b[0;34m(url)\u001b[0m\n\u001b[1;32m 424\u001b[0m auth \u001b[38;5;241m=\u001b[39m auth \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 425\u001b[0m host, port \u001b[38;5;241m=\u001b[39m \u001b[43m_HOST_PORT_RE\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmatch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhost_port\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m() \u001b[38;5;66;03m# type: ignore[union-attr]\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m auth \u001b[38;5;129;01mand\u001b[39;00m normalize_uri:\n",
|
||||
"\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'groups'",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[0;31mLocationParseError\u001b[0m Traceback (most recent call last)",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:434\u001b[0m, in \u001b[0;36mPreparedRequest.prepare_url\u001b[0;34m(self, url, params)\u001b[0m\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 434\u001b[0m scheme, auth, host, port, path, query, fragment \u001b[38;5;241m=\u001b[39m \u001b[43mparse_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m LocationParseError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/urllib3/util/url.py:451\u001b[0m, in \u001b[0;36mparse_url\u001b[0;34m(url)\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mAttributeError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 451\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m LocationParseError(source_url) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# For the sake of backwards compatibility we put empty\u001b[39;00m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;66;03m# string values for path if there are any defined values\u001b[39;00m\n\u001b[1;32m 455\u001b[0m \u001b[38;5;66;03m# beyond the path in the URL.\u001b[39;00m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;66;03m# TODO: Remove this when we break backwards compatibility.\u001b[39;00m\n",
|
||||
"\u001b[0;31mLocationParseError\u001b[0m: Failed to parse: <Response [405]>",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001b[0;31mInvalidURL\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[96], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01msseclient\u001b[39;00m\u001b[38;5;241m,\u001b[39m \u001b[38;5;21;01mpprint\u001b[39;00m\n\u001b[1;32m 3\u001b[0m stream_response \u001b[38;5;241m=\u001b[39m requests\u001b[38;5;241m.\u001b[39mget(url, json\u001b[38;5;241m=\u001b[39mobj, stream\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m----> 4\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[43msseclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSSEClient\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream_response\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Loop forever (while connection \"open\")\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m event \u001b[38;5;129;01min\u001b[39;00m client\u001b[38;5;241m.\u001b[39mevents():\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/sseclient.py:48\u001b[0m, in \u001b[0;36mSSEClient.__init__\u001b[0;34m(self, url, last_id, retry, session, chunk_size, **kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# Keep data here as it streams in\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuf \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 48\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connect\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/sseclient.py:56\u001b[0m, in \u001b[0;36mSSEClient._connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;66;03m# Use session if set. Otherwise fall back to requests module.\u001b[39;00m\n\u001b[1;32m 55\u001b[0m requester \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msession \u001b[38;5;129;01mor\u001b[39;00m requests\n\u001b[0;32m---> 56\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp \u001b[38;5;241m=\u001b[39m \u001b[43mrequester\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequests_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp_iterator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miter_content()\n\u001b[1;32m 58\u001b[0m encoding \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresp\u001b[38;5;241m.\u001b[39mapparent_encoding\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:73\u001b[0m, in \u001b[0;36mget\u001b[0;34m(url, params, **kwargs)\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget\u001b[39m(url, params\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 63\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Sends a GET request.\u001b[39;00m\n\u001b[1;32m 64\u001b[0m \n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m :param url: URL for the new :class:`Request` object.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 70\u001b[0m \u001b[38;5;124;03m :rtype: requests.Response\u001b[39;00m\n\u001b[1;32m 71\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/api.py:59\u001b[0m, in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;66;03m# By using the 'with' statement we are sure the session is closed, thus we\u001b[39;00m\n\u001b[1;32m 56\u001b[0m \u001b[38;5;66;03m# avoid leaving sockets open which can trigger a ResourceWarning in some\u001b[39;00m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;66;03m# cases, and look like a memory leak in others.\u001b[39;00m\n\u001b[1;32m 58\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m sessions\u001b[38;5;241m.\u001b[39mSession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 59\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:575\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;66;03m# Create the Request.\u001b[39;00m\n\u001b[1;32m 563\u001b[0m req \u001b[38;5;241m=\u001b[39m Request(\n\u001b[1;32m 564\u001b[0m method\u001b[38;5;241m=\u001b[39mmethod\u001b[38;5;241m.\u001b[39mupper(),\n\u001b[1;32m 565\u001b[0m url\u001b[38;5;241m=\u001b[39murl,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 573\u001b[0m hooks\u001b[38;5;241m=\u001b[39mhooks,\n\u001b[1;32m 574\u001b[0m )\n\u001b[0;32m--> 575\u001b[0m prep \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreq\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 577\u001b[0m proxies \u001b[38;5;241m=\u001b[39m proxies \u001b[38;5;129;01mor\u001b[39;00m {}\n\u001b[1;32m 579\u001b[0m settings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmerge_environment_settings(\n\u001b[1;32m 580\u001b[0m prep\u001b[38;5;241m.\u001b[39murl, proxies, stream, verify, cert\n\u001b[1;32m 581\u001b[0m )\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/sessions.py:486\u001b[0m, in \u001b[0;36mSession.prepare_request\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 483\u001b[0m auth \u001b[38;5;241m=\u001b[39m get_netrc_auth(request\u001b[38;5;241m.\u001b[39murl)\n\u001b[1;32m 485\u001b[0m p \u001b[38;5;241m=\u001b[39m PreparedRequest()\n\u001b[0;32m--> 486\u001b[0m \u001b[43mp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupper\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfiles\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m \u001b[49m\u001b[43mjson\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 492\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[43m \u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdict_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCaseInsensitiveDict\u001b[49m\n\u001b[1;32m 494\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 495\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 496\u001b[0m \u001b[43m \u001b[49m\u001b[43mauth\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_setting\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauth\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 497\u001b[0m \u001b[43m \u001b[49m\u001b[43mcookies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerged_cookies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 498\u001b[0m \u001b[43m \u001b[49m\u001b[43mhooks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmerge_hooks\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhooks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhooks\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 499\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 500\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m p\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:368\u001b[0m, in \u001b[0;36mPreparedRequest.prepare\u001b[0;34m(self, method, url, headers, files, data, params, auth, cookies, hooks, json)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Prepares the entire request with the given parameters.\"\"\"\u001b[39;00m\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_method(method)\n\u001b[0;32m--> 368\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_url\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_headers(headers)\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_cookies(cookies)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/dscb/lib/python3.9/site-packages/requests/models.py:436\u001b[0m, in \u001b[0;36mPreparedRequest.prepare_url\u001b[0;34m(self, url, params)\u001b[0m\n\u001b[1;32m 434\u001b[0m scheme, auth, host, port, path, query, fragment \u001b[38;5;241m=\u001b[39m parse_url(url)\n\u001b[1;32m 435\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m LocationParseError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 436\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidURL(\u001b[38;5;241m*\u001b[39me\u001b[38;5;241m.\u001b[39margs)\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m scheme:\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m MissingSchema(\n\u001b[1;32m 440\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid URL \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m: No scheme supplied. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 441\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPerhaps you meant https://\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 442\u001b[0m )\n",
|
||||
"\u001b[0;31mInvalidURL\u001b[0m: Failed to parse: <Response [405]>"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sseclient, pprint\n",
|
||||
"\n",
|
||||
"stream_response = requests.get(url, json=obj, stream=True)\n",
|
||||
"client = sseclient.SSEClient(stream_response)\n",
|
||||
"\n",
|
||||
"# Loop forever (while connection \"open\")\n",
|
||||
"for event in client.events():\n",
|
||||
" print (\"got a new event from server\")\n",
|
||||
" pprint.pprint(event.data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"id": "7513b2a1-0749-44b5-88a9-d91d5b175e8e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Exception in thread Thread-179:\n",
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/home/robertgshaw/.conda/envs/dscb/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n",
|
||||
" self.run()\n",
|
||||
" File \"/home/robertgshaw/.conda/envs/dscb/lib/python3.9/threading.py\", line 917, in run\n",
|
||||
" self._target(*self._args, **self._kwargs)\n",
|
||||
" File \"/tmp/ipykernel_9240/1133841396.py\", line 26, in request_task\n",
|
||||
"TypeError: 'NoneType' object is not subscriptable\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"55\n",
|
||||
"{'response_text': '\\n if n == 0:\\n return 0\\n elif n == 1:\\n return 1\\n else:\\n return fib(n-1) + fib(n-2)\\n\\n# Driver function to test above function\\nn', 'finish_reason': 1}\n",
|
||||
"Finish the following function for computing a fibonacci sequence: \n",
|
||||
"\n",
|
||||
"def fib(n):\n",
|
||||
" if n == 0:\n",
|
||||
" return 0\n",
|
||||
" elif n == 1:\n",
|
||||
" return 1\n",
|
||||
" else:\n",
|
||||
" return fib(n-1) + fib(n-2)\n",
|
||||
"\n",
|
||||
"# Driver function to test above function\n",
|
||||
"n\n"
|
||||
"None\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -43,7 +202,7 @@
|
||||
"from threading import Thread\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"url = \"http://127.0.0.1:5543/generate\"\n",
|
||||
"url = \"http://127.0.0.1:5543/generate_stream\"\n",
|
||||
"sequence = \"Finish the following function for computing a fibonacci sequence: \\n\\ndef fib(n):\"\n",
|
||||
"# sequence = \"def fib(n):\"\n",
|
||||
"\n",
|
||||
|
Loading…
Reference in New Issue
Block a user