diff --git a/backends/trtllm/Dockerfile b/backends/trtllm/Dockerfile index 2afd4c57..ee7f5ab0 100644 --- a/backends/trtllm/Dockerfile +++ b/backends/trtllm/Dockerfile @@ -53,7 +53,7 @@ RUN mkdir /usr/local/tgi && mkdir /usr/local/tgi/include && mkdir /usr/local/tgi FROM nvcr.io/nvidia/pytorch:24.05-py3 WORKDIR /usr/local/tgi/bin -ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:$LD_LIBRARY_PATH" +ENV LD_LIBRARY_PATH="/usr/local/tgi/lib:/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH" RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \ ln -s /usr/local/cuda/lib64/stubs/libnvidia-ml.so /usr/local/cuda/lib64/stubs/libnvidia-ml.so.1 @@ -61,3 +61,6 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib COPY --from=cuda-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=cuda-builder /usr/local/tgi /usr/local/tgi COPY --from=cuda-builder /usr/src/text-generation-inference/target/release/text-generation-backends-trtllm /usr/local/tgi/bin/text-generation-launcher + +ENTRYPOINT ["text-generation-launcher"] +CMD ["--executor-worker", "/usr/local/tgi/bin/executorWorker"] diff --git a/backends/trtllm/README.md b/backends/trtllm/README.md new file mode 100644 index 00000000..e5f4719f --- /dev/null +++ b/backends/trtllm/README.md @@ -0,0 +1,6 @@ +```mermaid +sequenceDiagram + TensorRtLlmBackend -->> TensorRtLlmBackendImpl: New thread which instantiates actual backend impl + TensorRtLlmBackendImpl -->> TensorRtLlmBackendImpl.Receiver: Awaits incoming request sent throught the queue + +``` \ No newline at end of file