diff --git a/Makefile b/Makefile
index ade0cdd5..3defd886 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@ server-dev:
 	cd server && make run-dev
 
 router-dev:
-	cd router && cargo run
+	cd router && cargo run -- --port 8080
 
 integration-tests: install-router install-launcher
 	cargo test
@@ -22,16 +22,16 @@ python-tests:
 	cd server && HF_HUB_ENABLE_HF_TRANSFER=1 pytest tests
 
 run-bloom-560m:
-	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2
+	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --port 8080
 
 run-bloom-560m-quantize:
-	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize
+	text-generation-launcher --model-id bigscience/bloom-560m --num-shard 2 --quantize --port 8080
 
 download-bloom:
 	HF_HUB_ENABLE_HF_TRANSFER=1 text-generation-server download-weights bigscience/bloom
 
 run-bloom:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --port 8080
 
 run-bloom-quantize:
-	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize
\ No newline at end of file
+	text-generation-launcher --model-id bigscience/bloom --num-shard 8 --quantize --port 8080
\ No newline at end of file
diff --git a/README.md b/README.md
index ae185506..5cd26a0e 100644
--- a/README.md
+++ b/README.md
@@ -89,40 +89,35 @@ You can then query the model using either the `/generate` or `/generate_stream`
 ```shell
 curl 127.0.0.1:8080/generate \
     -X POST \
-    -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
     -H 'Content-Type: application/json'
 ```
 
 ```shell
 curl 127.0.0.1:8080/generate_stream \
     -X POST \
-    -d '{"inputs":"Testing API","parameters":{"max_new_tokens":9}}' \
+    -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17}}' \
     -H 'Content-Type: application/json'
 ```
 
 or from Python:
 
-```python
-import requests
-
-result = requests.post("http://127.0.0.1:8080/generate", json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
-print(result.json())
-```
-
 ```shell
-pip install sseclient-py
+pip install text-generation
 ```
 
-````python
-import sseclient
-import requests
+```python
+from text_generation import Client
 
-r = requests.post("http://127.0.0.1:8080/generate_stream", stream=True, json={"inputs":"Testing API","parameters":{"max_new_tokens":9}})
-sse_client = sseclient.SSEClient(r)
+client = Client("http://127.0.0.1:8080")
+print(client.generate("What is Deep Learning?", max_new_tokens=17).generated_text)
 
-for i, event in enumerate(sse_client.events()):
-    print(i, event.data)
-````
+text = ""
+for response in client.generate_stream("What is Deep Learning?", max_new_tokens=17):
+    if not response.token.special:
+        text += response.token.text
+print(text)
+```
 
 **Note:** To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html).