diff --git a/backends/neuron/Makefile b/backends/neuron/Makefile
index ec65ff40..06674971 100644
--- a/backends/neuron/Makefile
+++ b/backends/neuron/Makefile
@@ -33,7 +33,3 @@ install_server:
 test_server: install_server
 	python -m pip install -r ${mkfile_dir}/tests/requirements.txt
 	python -m pytest -sv ${mkfile_dir}/tests/server
-
-test_integration: image
-	python -m pip install -r ${mkfile_dir}/tests/requirements.txt
-	python -m pytest -sv ${mkfile_dir}/tests/integration
diff --git a/backends/neuron/tests/conftest.py b/backends/neuron/tests/conftest.py
index f0fc72ab..1dd20c8c 100644
--- a/backends/neuron/tests/conftest.py
+++ b/backends/neuron/tests/conftest.py
@@ -1 +1 @@
-pytest_plugins = ["fixtures.service", "fixtures.model"]
+pytest_plugins = ["fixtures.model"]
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 2d3ae8a2..529c614f 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -1,3 +1,4 @@
+pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.model"]
 # ruff: noqa: E402
 from _pytest.fixtures import SubRequest
 import requests
diff --git a/integration-tests/fixtures/neuron/model.py b/integration-tests/fixtures/neuron/model.py
new file mode 100644
index 00000000..6fa63ce8
--- /dev/null
+++ b/integration-tests/fixtures/neuron/model.py
@@ -0,0 +1,129 @@
+import copy
+import logging
+import subprocess
+import sys
+from tempfile import TemporaryDirectory
+
+import huggingface_hub
+import pytest
+from transformers import AutoTokenizer
+
+from optimum.neuron import NeuronModelForCausalLM
+from optimum.neuron.utils import synchronize_hub_cache
+from optimum.neuron.version import __sdk_version__ as sdk_version
+from optimum.neuron.version import __version__ as version
+
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
+    stream=sys.stdout,
+)
+logger = logging.getLogger(__file__)
+
+OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
+
+# All model configurations below will be added to the neuron_model_config fixture
+MODEL_CONFIGURATIONS = {
+    "gpt2": {
+        "model_id": "gpt2",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 1024, "num_cores": 2, "auto_cast_type": "fp16"},
+    },
+    "llama": {
+        "model_id": "NousResearch/Hermes-2-Theta-Llama-3-8B",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 2048, "num_cores": 2, "auto_cast_type": "fp16"},
+    },
+    "mistral": {
+        "model_id": "optimum/mistral-1.1b-testing",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
+    },
+    "qwen2": {
+        "model_id": "Qwen/Qwen2.5-0.5B",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "fp16"},
+    },
+    "granite": {
+        "model_id": "ibm-granite/granite-3.1-2b-instruct",
+        "export_kwargs": {"batch_size": 4, "sequence_length": 4096, "num_cores": 2, "auto_cast_type": "bf16"},
+    },
+}
+
+
+def get_hub_neuron_model_id(config_name: str):
+    return f"optimum-internal-testing/neuron-testing-{version}-{sdk_version}-{config_name}"
+
+
+def export_model(model_id, export_kwargs, neuron_model_path):
+    export_command = ["optimum-cli", "export", "neuron", "-m", model_id, "--task", "text-generation"]
+    for kwarg, value in export_kwargs.items():
+        export_command.append(f"--{kwarg}")
+        export_command.append(str(value))
+    export_command.append(neuron_model_path)
+    logger.info(f"Exporting {model_id} with {export_kwargs}")
+    try:
+        subprocess.run(export_command, check=True)
+    except subprocess.CalledProcessError as e:
+        raise ValueError(f"Failed to export model: {e}")
+
+
+@pytest.fixture(scope="session", params=MODEL_CONFIGURATIONS.keys())
+def neuron_model_config(request):
+    """Expose a pre-trained neuron model
+
+    The fixture first makes sure the following model artifacts are present on the hub:
+    - exported neuron model under optimum-internal-testing/neuron-testing-<version>-<name>,
+    - cached artifacts under optimum-internal-testing/neuron-testing-cache.
+    If not, it will export the model and push it to the hub.
+
+    It then fetches the model locally and return a dictionary containing:
+    - a configuration name,
+    - the original model id,
+    - the export parameters,
+    - the neuron model id,
+    - the neuron model local path.
+
+    For each exposed model, the local directory is maintained for the duration of the
+    test session and cleaned up afterwards.
+    The hub model artifacts are never cleaned up and persist accross sessions.
+    They must be cleaned up manually when the optimum-neuron version changes.
+
+    """
+    config_name = request.param
+    model_config = copy.deepcopy(MODEL_CONFIGURATIONS[request.param])
+    model_id = model_config["model_id"]
+    export_kwargs = model_config["export_kwargs"]
+    neuron_model_id = get_hub_neuron_model_id(config_name)
+    with TemporaryDirectory() as neuron_model_path:
+        hub = huggingface_hub.HfApi()
+        if hub.repo_exists(neuron_model_id):
+            logger.info(f"Fetching {neuron_model_id} from the HuggingFace hub")
+            hub.snapshot_download(neuron_model_id, local_dir=neuron_model_path)
+        else:
+            export_model(model_id, export_kwargs, neuron_model_path)
+            tokenizer = AutoTokenizer.from_pretrained(model_id)
+            tokenizer.save_pretrained(neuron_model_path)
+            del tokenizer
+            # Create the test model on the hub
+            hub.create_repo(neuron_model_id, private=True)
+            hub.upload_folder(
+                folder_path=neuron_model_path,
+                repo_id=neuron_model_id,
+                ignore_patterns=[NeuronModelForCausalLM.CHECKPOINT_DIR + "/*"],
+            )
+            # Make sure it is cached
+            synchronize_hub_cache(cache_repo_id=OPTIMUM_CACHE_REPO_ID)
+        # Add dynamic parameters to the model configuration
+        model_config["neuron_model_path"] = neuron_model_path
+        model_config["neuron_model_id"] = neuron_model_id
+        # Also add model configuration name to allow tests to adapt their expectations
+        model_config["name"] = config_name
+        # Yield instead of returning to keep a reference to the temporary directory.
+        # It will go out of scope and be released only once all tests needing the fixture
+        # have been completed.
+        logger.info(f"{config_name} ready for testing ...")
+        yield model_config
+        logger.info(f"Done with {config_name}")
+
+
+@pytest.fixture(scope="module")
+def neuron_model_path(neuron_model_config):
+    yield neuron_model_config["neuron_model_path"]
diff --git a/backends/neuron/tests/fixtures/service.py b/integration-tests/fixtures/neuron/service.py
similarity index 91%
rename from backends/neuron/tests/fixtures/service.py
rename to integration-tests/fixtures/neuron/service.py
index 85b0adc5..927a35af 100644
--- a/backends/neuron/tests/fixtures/service.py
+++ b/integration-tests/fixtures/neuron/service.py
@@ -18,9 +18,20 @@ from huggingface_hub import AsyncInferenceClient, TextGenerationOutput
 
 
 OPTIMUM_CACHE_REPO_ID = "optimum-internal-testing/neuron-testing-cache"
-DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "text-generation-inference:latest-neuron")
 HF_TOKEN = huggingface_hub.get_token()
 
+
+def get_tgi_docker_image():
+    docker_image = os.getenv("DOCKER_IMAGE", None)
+    if docker_image is None:
+        client = docker.from_env()
+        images = client.images.list(filters={"reference": "text-generation-inference"})
+        if not images:
+            raise ValueError("No text-generation-inference image found on this host to run tests.")
+        docker_image = images[0].tags[0]
+    return docker_image
+
+
 logging.basicConfig(
     level=logging.INFO,
     format="[%(asctime)s] %(levelname)s [%(filename)s.%(funcName)s:%(lineno)d] %(message)s",
@@ -83,7 +94,7 @@ def event_loop():
 
 
 @pytest.fixture(scope="module")
-def launcher(event_loop):
+def neuron_launcher(event_loop):
     """Utility fixture to expose a TGI service.
 
     The fixture uses a single event loop for each module, but it can create multiple
@@ -130,15 +141,16 @@ def launcher(event_loop):
             if var in os.environ:
                 env[var] = os.environ[var]
 
+        base_image = get_tgi_docker_image()
         if os.path.isdir(model_name_or_path):
             # Create a sub-image containing the model to workaround docker dind issues preventing
             # to share a volume from the container running tests
 
-            docker_tag = f"{container_name}-img"
+            test_image = f"{container_name}-img"
             logger.info(
                 "Building image on the flight derivated from %s, tagged with %s",
-                DOCKER_IMAGE,
-                docker_tag,
+                base_image,
+                test_image,
             )
             with tempfile.TemporaryDirectory() as context_dir:
                 # Copy model directory to build context
@@ -147,17 +159,17 @@ def launcher(event_loop):
                 # Create Dockerfile
                 container_model_id = f"/data/{model_name_or_path}"
                 docker_content = f"""
-                FROM {DOCKER_IMAGE}
+                FROM {base_image}
                 COPY model {container_model_id}
                 """
                 with open(os.path.join(context_dir, "Dockerfile"), "wb") as f:
                     f.write(docker_content.encode("utf-8"))
                     f.flush()
-                image, logs = client.images.build(path=context_dir, dockerfile=f.name, tag=docker_tag)
+                image, logs = client.images.build(path=context_dir, dockerfile=f.name, tag=test_image)
             logger.info("Successfully built image %s", image.id)
             logger.debug("Build logs %s", logs)
         else:
-            docker_tag = DOCKER_IMAGE
+            test_image = base_image
             image = None
             container_model_id = model_name_or_path
 
@@ -167,7 +179,7 @@ def launcher(event_loop):
             args.append("--trust-remote-code")
 
         container = client.containers.run(
-            docker_tag,
+            test_image,
             command=args,
             name=container_name,
             environment=env,
@@ -210,7 +222,7 @@ def launcher(event_loop):
 
 
 @pytest.fixture(scope="module")
-def generate_load():
+def neuron_generate_load():
     """A utility fixture to launch multiple asynchronous TGI requests in parallel
 
     Args:
diff --git a/backends/neuron/tests/integration/test_generate.py b/integration-tests/neuron/integration/test_generate.py
similarity index 93%
rename from backends/neuron/tests/integration/test_generate.py
rename to integration-tests/neuron/integration/test_generate.py
index db716be5..c6bdcccf 100644
--- a/backends/neuron/tests/integration/test_generate.py
+++ b/integration-tests/neuron/integration/test_generate.py
@@ -3,10 +3,10 @@ import pytest
 
 
 @pytest.fixture
-async def tgi_service(launcher, neuron_model_config):
+async def tgi_service(neuron_launcher, neuron_model_config):
     model_name_or_path = neuron_model_config["neuron_model_path"]
     service_name = neuron_model_config["name"]
-    with launcher(service_name, model_name_or_path) as tgi_service:
+    with neuron_launcher(service_name, model_name_or_path) as tgi_service:
         await tgi_service.health(600)
         yield tgi_service
 
@@ -71,9 +71,9 @@ async def test_model_single_request(tgi_service):
 
 
 @pytest.mark.asyncio
-async def test_model_multiple_requests(tgi_service, generate_load):
+async def test_model_multiple_requests(tgi_service, neuron_generate_load):
     num_requests = 4
-    responses = await generate_load(
+    responses = await neuron_generate_load(
         tgi_service.client,
         "What is Deep Learning?",
         max_new_tokens=17,
diff --git a/backends/neuron/tests/integration/test_implicit_env.py b/integration-tests/neuron/integration/test_implicit_env.py
similarity index 94%
rename from backends/neuron/tests/integration/test_implicit_env.py
rename to integration-tests/neuron/integration/test_implicit_env.py
index fa88ab67..df29b910 100644
--- a/backends/neuron/tests/integration/test_implicit_env.py
+++ b/integration-tests/neuron/integration/test_implicit_env.py
@@ -5,7 +5,7 @@ from huggingface_hub.errors import ValidationError
 
 
 @pytest.fixture(scope="module", params=["hub-neuron", "hub", "local-neuron"])
-async def tgi_service(request, launcher, neuron_model_config):
+async def tgi_service(request, neuron_launcher, neuron_model_config):
     """Expose a TGI service corresponding to a model configuration
 
     For each model configuration, the service will be started using the following
@@ -31,7 +31,7 @@ async def tgi_service(request, launcher, neuron_model_config):
     else:
         model_name_or_path = neuron_model_config["neuron_model_path"]
     service_name = neuron_model_config["name"]
-    with launcher(service_name, model_name_or_path) as tgi_service:
+    with neuron_launcher(service_name, model_name_or_path) as tgi_service:
         await tgi_service.health(600)
         yield tgi_service