mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
fix
This commit is contained in:
parent
d8f33e3c2b
commit
3c93b31959
@ -43,11 +43,11 @@ void gemm_half_q_half_cuda_part
|
||||
bool mul_r_weights
|
||||
)
|
||||
{
|
||||
ofstream myfile;
|
||||
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
|
||||
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
|
||||
if (!b->is_gptq)
|
||||
{
|
||||
myfile << "go in is_gptq path" << "\n";
|
||||
myfile.flush();
|
||||
dim3 blockDim, gridDim;
|
||||
blockDim.x = EXL2_BLOCK_KN_SIZE;
|
||||
blockDim.y = 1;
|
||||
@ -59,6 +59,7 @@ void gemm_half_q_half_cuda_part
|
||||
fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);
|
||||
|
||||
myfile << "launch kernel" << "\n";
|
||||
myfile.flush();
|
||||
kernel<<<gridDim, blockDim>>>
|
||||
(
|
||||
a,
|
||||
@ -119,6 +120,7 @@ void gemm_half_q_half_cuda_part
|
||||
r_weights_stride
|
||||
);
|
||||
}
|
||||
myfile.flush();
|
||||
myfile.close();
|
||||
}
|
||||
|
||||
|
@ -109,9 +109,9 @@ void gemm_half_q_half
|
||||
bool force_cuda
|
||||
)
|
||||
{
|
||||
ofstream myfile;
|
||||
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
|
||||
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
|
||||
myfile << "start gemm_half_q_half" << "\n";
|
||||
myfile.flush();
|
||||
|
||||
QMatrix* qm = reinterpret_cast<QMatrix*> (b);
|
||||
|
||||
@ -124,6 +124,7 @@ void gemm_half_q_half
|
||||
const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
|
||||
|
||||
myfile << "call gemm_half_q_half_cuda" << "\n";
|
||||
myfile.flush();
|
||||
gemm_half_q_half_cuda
|
||||
(
|
||||
at::cuda::getCurrentCUDABlasHandle(),
|
||||
|
Loading…
Reference in New Issue
Block a user