fix

2025-09-11 04:14:52 +00:00 · 2024-01-25 12:28:56 +00:00 · 2024-01-25 12:28:56 +00:00 · 3c93b31959
commit 3c93b31959
parent d8f33e3c2b
2 changed files with 7 additions and 4 deletions
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu
@ -43,11 +43,11 @@ void gemm_half_q_half_cuda_part
    bool mul_r_weights
 )
 {
-    ofstream myfile;
-    myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
+    ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
    if (!b->is_gptq)
    {
        myfile << "go in is_gptq path" << "\n";
+        myfile.flush();
        dim3 blockDim, gridDim;
        blockDim.x = EXL2_BLOCK_KN_SIZE;
        blockDim.y = 1;
@ -59,6 +59,7 @@ void gemm_half_q_half_cuda_part
        fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);

        myfile << "launch kernel" << "\n";
+        myfile.flush();
        kernel<<<gridDim, blockDim>>>
        (
            a,
@ -119,6 +120,7 @@ void gemm_half_q_half_cuda_part
            r_weights_stride
        );
    }
+    myfile.flush();
    myfile.close();
 }

--- a/server/exllamav2_kernels/exllamav2_kernels/ext.cpp
+++ b/server/exllamav2_kernels/exllamav2_kernels/ext.cpp
@ -109,9 +109,9 @@ void gemm_half_q_half
    bool force_cuda
 )
 {
-    ofstream myfile;
-    myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
+    ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
    myfile << "start gemm_half_q_half" << "\n";
+    myfile.flush();

    QMatrix* qm = reinterpret_cast<QMatrix*> (b);

@ -124,6 +124,7 @@ void gemm_half_q_half
    const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
    
    myfile << "call gemm_half_q_half_cuda" << "\n";
+    myfile.flush();
    gemm_half_q_half_cuda
    (
        at::cuda::getCurrentCUDABlasHandle(),