This commit is contained in:
Felix Marty 2024-01-25 12:28:56 +00:00 committed by OlivierDehaene
parent d8f33e3c2b
commit 3c93b31959
2 changed files with 7 additions and 4 deletions

View File

@ -43,11 +43,11 @@ void gemm_half_q_half_cuda_part
bool mul_r_weights
)
{
ofstream myfile;
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
if (!b->is_gptq)
{
myfile << "go in is_gptq path" << "\n";
myfile.flush();
dim3 blockDim, gridDim;
blockDim.x = EXL2_BLOCK_KN_SIZE;
blockDim.y = 1;
@ -59,6 +59,7 @@ void gemm_half_q_half_cuda_part
fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);
myfile << "launch kernel" << "\n";
myfile.flush();
kernel<<<gridDim, blockDim>>>
(
a,
@ -119,6 +120,7 @@ void gemm_half_q_half_cuda_part
r_weights_stride
);
}
myfile.flush();
myfile.close();
}

View File

@ -109,9 +109,9 @@ void gemm_half_q_half
bool force_cuda
)
{
ofstream myfile;
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
myfile << "start gemm_half_q_half" << "\n";
myfile.flush();
QMatrix* qm = reinterpret_cast<QMatrix*> (b);
@ -124,6 +124,7 @@ void gemm_half_q_half
const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
myfile << "call gemm_half_q_half_cuda" << "\n";
myfile.flush();
gemm_half_q_half_cuda
(
at::cuda::getCurrentCUDABlasHandle(),