mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
* feat(fp8): add support for fbgemm * allow loading fp8 weights directly * update outlines * fix makefile * build fbgemm * avoid circular import and fix dockerfile * add default dtype * refactored weights loader * fix auto conversion * fix quantization config parsing * force new nccl on install * missing get_weights implementation * increase timeout
12 lines
510 B
Bash
Executable File
12 lines
510 B
Bash
Executable File
#!/bin/bash
|
|
|
|
# This script is required to patch torch < 2.4
|
|
# It adds the 90a cuda target (H100)
|
|
# This target is required to build FBGEMM kernels
|
|
|
|
torch_cuda_arch=$(python -c "import torch; print(torch.__file__)" | sed 's/\/__init__.py//; s|$|/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake|')
|
|
|
|
sed -i '189s/\[0-9]\\\\\.\[0-9](/[0-9]\\\\.[0-9]a?(/' $torch_cuda_arch
|
|
sed -i '245s/\[0-9()]+\+"/[0-9()]+a?"/' $torch_cuda_arch
|
|
sed -i '246s/\[0-9]+\+"/[0-9]+a?"/' $torch_cuda_arch
|