diff --git a/Dockerfile_amd b/Dockerfile_amd
index f6dffac5..81050343 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -114,6 +114,13 @@ ARG BUILD_CAFFE2="0" \
 
 RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install
 
+ARG GITHUB_TOKEN
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends wget && \
+    rm -rf /var/lib/apt/lists/* && \
+    wget --header "Authorization: token ${GITHUB_TOKEN}" https://raw.githubusercontent.com/fxmarty/patched_hipruntime/main/libamdhip64.so.6.2.41130
+
+ENV LD_PRELOAD="/libamdhip64.so.6.2.41130"
+
 # Set as recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
 # Disabled for now as it is currently not stable with ROCm 6.1.
 # ENV HIP_FORCE_DEV_KERNARG=1
diff --git a/Dockerfile_amd_nightly_no_patch b/Dockerfile_amd_nightly_no_patch
deleted file mode 100644
index 9d99fc16..00000000
--- a/Dockerfile_amd_nightly_no_patch
+++ /dev/null
@@ -1,199 +0,0 @@
-# Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
-WORKDIR /usr/src
-
-ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
-
-FROM chef as planner
-COPY Cargo.toml Cargo.toml
-COPY rust-toolchain.toml rust-toolchain.toml
-COPY proto proto
-COPY benchmark benchmark
-COPY router router
-COPY launcher launcher
-RUN cargo chef prepare --recipe-path recipe.json
-
-FROM chef AS builder
-
-ARG GIT_SHA
-ARG DOCKER_LABEL
-
-RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
-    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
-    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
-    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
-    rm -f $PROTOC_ZIP
-
-COPY --from=planner /usr/src/recipe.json recipe.json
-RUN cargo chef cook --release --recipe-path recipe.json
-
-COPY Cargo.toml Cargo.toml
-COPY rust-toolchain.toml rust-toolchain.toml
-COPY proto proto
-COPY benchmark benchmark
-COPY router router
-COPY launcher launcher
-RUN cargo build --release
-
-# Text Generation Inference base image for RoCm
-FROM rocm/dev-ubuntu-22.04:6.1 as base
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    build-essential \
-    ca-certificates \
-    ccache \
-    curl \
-    git \
-    make \
-    libssl-dev \
-    g++ \
-    # Needed to build VLLM & flash.
-    rocthrust-dev \
-    hipsparse-dev \
-    hipblas-dev \
-    hipblaslt-dev \
-    rocblas-dev \
-    hiprand-dev \
-    rocrand-dev \
-    miopen-hip-dev \
-    hipfft-dev \
-    hipcub-dev \
-    hipsolver-dev \
-    rccl-dev \
-    cmake \
-    python3-dev && \
-    rm -rf /var/lib/apt/lists/*
-
-# Keep in sync with `server/pyproject.toml
-ARG MAMBA_VERSION=23.1.0-1
-ARG PYTORCH_VERSION='2.3.0'
-ARG ROCM_VERSION='6.0.2'
-ARG PYTHON_VERSION='3.10.10'
-# Automatically set by buildx
-ARG TARGETPLATFORM
-ENV PATH /opt/conda/bin:$PATH
-
-# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
-# Install mamba
-# translating Docker's TARGETPLATFORM into mamba arches
-RUN case ${TARGETPLATFORM} in \
-         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-         *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
-RUN chmod +x ~/mambaforge.sh && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    mamba init && \
-    rm ~/mambaforge.sh
-
-# Install flash-attention, torch dependencies
-RUN pip install numpy einops ninja --no-cache-dir
-
-RUN conda install intel::mkl-static intel::mkl-include
-
-RUN pip install --pre torch==2.4.0.dev20240506 --index-url https://download.pytorch.org/whl/nightly/rocm6.1
-
-RUN pip uninstall -y triton && \
-    git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \
-    cd triton/python && \
-    pip install .
-
-# Set as recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
-# Disabled for now as it is currently not stable with ROCm 6.1.
-# ENV HIP_FORCE_DEV_KERNARG=1
-
-FROM base AS kernel-builder
-
-# # Build vllm kernels
-FROM kernel-builder AS vllm-builder
-WORKDIR /usr/src
-
-COPY server/Makefile-vllm Makefile
-
-# Build specific version of vllm
-RUN make build-vllm-rocm
-
-# Build Flash Attention v2 kernels
-FROM kernel-builder AS flash-att-v2-builder
-WORKDIR /usr/src
-
-COPY server/Makefile-flash-att-v2 Makefile
-
-# Build specific version of flash attention v2
-RUN make build-flash-attention-v2-rocm
-
-# Build Transformers CUDA kernels (gpt-neox and bloom)
-FROM kernel-builder as custom-kernels-builder
-WORKDIR /usr/src
-COPY server/custom_kernels/ .
-RUN python setup.py build
-
-# Build exllama kernels
-FROM kernel-builder as exllama-kernels-builder
-WORKDIR /usr/src
-COPY server/exllama_kernels/ .
-
-RUN python setup.py build
-
-# Build exllama v2 kernels
-FROM kernel-builder as exllamav2-kernels-builder
-WORKDIR /usr/src
-COPY server/exllamav2_kernels/ .
-
-RUN python setup.py build
-
-FROM base as base-copy
-
-# Text Generation Inference base env
-ENV HUGGINGFACE_HUB_CACHE=/data \
-    HF_HUB_ENABLE_HF_TRANSFER=1 \
-    PORT=80
-
-# Copy builds artifacts from vllm builder
-COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from flash attention v2 builder
-COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from custom kernels builder
-COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from exllama kernels builder
-COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from exllamav2 kernels builder
-COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Install server
-COPY proto proto
-COPY server server
-COPY server/Makefile server/Makefile
-
-# Install benchmarker
-COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
-# Install router
-COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
-# Install launcher
-COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
-
-RUN cd server && \
-    make gen-server && \
-    pip install -r requirements_rocm.txt
-    #pip install ".[accelerate, peft, outlines]" --no-cache-dir
-
-# AWS Sagemaker compatible image
-FROM base as sagemaker
-
-COPY sagemaker-entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-ENTRYPOINT ["./entrypoint.sh"]
-
-# Final image
-FROM base-copy
-
-# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
-# RUN chmod +x /tgi-entrypoint.sh
-
-# ENTRYPOINT ["/tgi-entrypoint.sh"]
-# CMD ["--json-output"]
diff --git a/Dockerfile_amd_rocm60 b/Dockerfile_amd_rocm60
deleted file mode 100644
index e3f38fbd..00000000
--- a/Dockerfile_amd_rocm60
+++ /dev/null
@@ -1,241 +0,0 @@
-# Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
-WORKDIR /usr/src
-
-ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
-
-FROM chef as planner
-COPY Cargo.toml Cargo.toml
-COPY rust-toolchain.toml rust-toolchain.toml
-COPY proto proto
-COPY benchmark benchmark
-COPY router router
-COPY launcher launcher
-RUN cargo chef prepare --recipe-path recipe.json
-
-FROM chef AS builder
-
-ARG GIT_SHA
-ARG DOCKER_LABEL
-
-RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
-    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
-    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
-    unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
-    rm -f $PROTOC_ZIP
-
-COPY --from=planner /usr/src/recipe.json recipe.json
-RUN cargo chef cook --release --recipe-path recipe.json
-
-COPY Cargo.toml Cargo.toml
-COPY rust-toolchain.toml rust-toolchain.toml
-COPY proto proto
-COPY benchmark benchmark
-COPY router router
-COPY launcher launcher
-RUN cargo build --release
-
-# Text Generation Inference base image for RoCm
-FROM rocm/dev-ubuntu-22.04:6.0.2 as base
-
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-    build-essential \
-    ca-certificates \
-    ccache \
-    curl \
-    git \
-    make \
-    libssl-dev \
-    g++ \
-    # Needed to build VLLM & flash.
-    rocthrust-dev \
-    hipsparse-dev \
-    hipblas-dev \
-    hipblaslt-dev \
-    rocblas-dev \
-    hiprand-dev \
-    rocrand-dev \
-    miopen-hip-dev \
-    hipfft-dev \
-    hipcub-dev \
-    hipsolver-dev \
-    rccl-dev \
-    wget \
-    python3-dev && \
-    rm -rf /var/lib/apt/lists/*
-
-# Keep in sync with `server/pyproject.toml
-ARG MAMBA_VERSION=23.1.0-1
-ARG PYTORCH_VERSION='2.3.0'
-ARG ROCM_VERSION='6.0.2'
-ARG PYTHON_VERSION='3.10.10'
-# Automatically set by buildx
-ARG TARGETPLATFORM
-ENV PATH /opt/conda/bin:$PATH
-
-# TGI seem to require libssl.so.1.1 instead of libssl.so.3 so we can't use ubuntu 22.04. Ubuntu 20.04 has python==3.8, and TGI requires python>=3.9, hence the need for miniconda.
-# Install mamba
-# translating Docker's TARGETPLATFORM into mamba arches
-RUN case ${TARGETPLATFORM} in \
-         "linux/arm64")  MAMBA_ARCH=aarch64  ;; \
-         *)              MAMBA_ARCH=x86_64   ;; \
-    esac && \
-    curl -fsSL -v -o ~/mambaforge.sh -O  "https://github.com/conda-forge/miniforge/releases/download/${MAMBA_VERSION}/Mambaforge-${MAMBA_VERSION}-Linux-${MAMBA_ARCH}.sh"
-RUN chmod +x ~/mambaforge.sh && \
-    bash ~/mambaforge.sh -b -p /opt/conda && \
-    mamba init && \
-    rm ~/mambaforge.sh
-
-# Install flash-attention, torch dependencies
-RUN pip install numpy einops ninja --no-cache-dir
-
-ARG PYTORCH_ROCM_ARCH="gfx90a;gfx942"
-
-# Install cmake >= 3.25.2
-RUN wget https://cmake.org/files/v3.29/cmake-3.29.3.tar.gz && \
-    tar -xf cmake-3.29.3.tar.gz && \
-    cd cmake-3.29.3 && \
-    ./configure && \
-    make && \
-    make install
-
-RUN conda install intel::mkl-static=2024.1.0 intel::mkl-include=2024.1.0
-
-# Build HipblasLt
-RUN apt-get purge -y hipblaslt hipblaslt-dev && \
-    apt-get update && \
-    apt-get install -y --no-install-recommends libmsgpack-dev && \
-    rm -rf /var/lib/apt/lists/* && \
-    mkdir -p libs && \
-    cd libs && \
-    git clone https://github.com/ROCm/hipBLASLt && \
-    cd hipBLASLt && \
-    git checkout 560c7e8f73788af47c2135425f7b6e4fa965b311 && \
-    pip install -r tensilelite/requirements.txt --no-cache-dir && \
-    SCCACHE_IDLE_TIMEOUT=1800 ./install.sh -i --architecture ${PYTORCH_ROCM_ARCH} && \
-    cd .. && rm -rf hipBLASLt && \
-    sed -i 's/, hipblaslt-dev \(.*\), hipcub-dev/, hipcub-dev/g' /var/lib/dpkg/status && \
-    sed -i 's/, hipblaslt \(.*\), hipfft/, hipfft/g' /var/lib/dpkg/status
-
-RUN pip uninstall -y triton && \
-    git clone --depth 1 --single-branch https://github.com/ROCm/triton.git && \
-    cd triton/python && \
-    pip install .
-
-RUN git clone --depth 1 --recursive --single-branch --branch 2.3-patched https://github.com/fxmarty/pytorch.git pytorch && cd pytorch && pip install -r requirements.txt --no-cache-dir
-
-ARG _GLIBCXX_USE_CXX11_ABI="1"
-ARG CMAKE_PREFIX_PATH="/opt/conda"
-ARG BUILD_CAFFE2="0" \
-    BUILD_CAFFE2_OPS="0" \
-    USE_CUDA="0" \
-    USE_ROCM="1" \
-    BUILD_TEST="0" \
-    USE_FBGEMM="0" \
-    USE_NNPACK="0" \
-    USE_QNNPACK="0" \
-    USE_XNNPACK="0" \
-    USE_FLASH_ATTENTION="1" \
-    USE_MEM_EFF_ATTENTION="0"
-
-RUN cd pytorch && python tools/amd_build/build_amd.py && python setup.py install
-
-# Set as recommended: https://github.com/ROCm/triton/wiki/A-script-to-set-program-execution-environment-in-ROCm
-# Disabled for now as it is currently not stable with ROCm 6.1.
-# ENV HIP_FORCE_DEV_KERNARG=1
-
-FROM base AS kernel-builder
-
-# # Build vllm kernels
-FROM kernel-builder AS vllm-builder
-WORKDIR /usr/src
-
-COPY server/Makefile-vllm Makefile
-
-# Build specific version of vllm
-RUN make build-vllm-rocm
-
-# Build Flash Attention v2 kernels
-FROM kernel-builder AS flash-att-v2-builder
-WORKDIR /usr/src
-
-COPY server/Makefile-flash-att-v2 Makefile
-
-# Build specific version of flash attention v2
-RUN make build-flash-attention-v2-rocm
-
-# Build Transformers CUDA kernels (gpt-neox and bloom)
-FROM kernel-builder as custom-kernels-builder
-WORKDIR /usr/src
-COPY server/custom_kernels/ .
-RUN python setup.py build
-
-# Build exllama kernels
-FROM kernel-builder as exllama-kernels-builder
-WORKDIR /usr/src
-COPY server/exllama_kernels/ .
-
-RUN python setup.py build
-
-# Build exllama v2 kernels
-FROM kernel-builder as exllamav2-kernels-builder
-WORKDIR /usr/src
-COPY server/exllamav2_kernels/ .
-
-RUN python setup.py build
-
-FROM base as base-copy
-
-# Text Generation Inference base env
-ENV HUGGINGFACE_HUB_CACHE=/data \
-    HF_HUB_ENABLE_HF_TRANSFER=1 \
-    PORT=80
-
-# Copy builds artifacts from vllm builder
-COPY --from=vllm-builder /usr/src/vllm/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from flash attention v2 builder
-COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from custom kernels builder
-COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from exllama kernels builder
-COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Copy build artifacts from exllamav2 kernels builder
-COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
-
-# Install server
-COPY proto proto
-COPY server server
-COPY server/Makefile server/Makefile
-
-# Install benchmarker
-COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
-# Install router
-COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bin/text-generation-router
-# Install launcher
-COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
-
-RUN cd server && \
-    make gen-server && \
-    pip install -r requirements_rocm.txt
-    #pip install ".[accelerate, peft, outlines]" --no-cache-dir
-
-# AWS Sagemaker compatible image
-FROM base as sagemaker
-
-COPY sagemaker-entrypoint.sh entrypoint.sh
-RUN chmod +x entrypoint.sh
-
-ENTRYPOINT ["./entrypoint.sh"]
-
-# Final image
-FROM base-copy
-
-# COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
-# RUN chmod +x /tgi-entrypoint.sh
-
-# ENTRYPOINT ["/tgi-entrypoint.sh"]
-# CMD ["--json-output"]