from tgi import TGI
from huggingface_hub import InferenceClient
import time

llm = TGI(model_id="google/paligemma-3b-mix-224")
client = InferenceClient("http://localhost:3000")

while True:
    print("Waiting for the model to be ready...")
    try:
        time.sleep(5)
        generated = client.text_generation("What is Deep Learning?")
        break
    except Exception as e:
        print(e)

print("Model is ready!")

time.sleep(2)

# do a couple of inference requests
print("Generating text...")
generated = client.text_generation("Where is the capital of France?")
print(generated)

time.sleep(2)

generated = client.text_generation(
    "What can you tell me about the history of the United States?"
)
print(generated)

time.sleep(2)

generated = client.text_generation("What are the main characteristics of a cat?")
print(generated)

llm.close()