mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
fix
This commit is contained in:
parent
d8f33e3c2b
commit
3c93b31959
@ -43,11 +43,11 @@ void gemm_half_q_half_cuda_part
|
|||||||
bool mul_r_weights
|
bool mul_r_weights
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
ofstream myfile;
|
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
|
||||||
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
|
|
||||||
if (!b->is_gptq)
|
if (!b->is_gptq)
|
||||||
{
|
{
|
||||||
myfile << "go in is_gptq path" << "\n";
|
myfile << "go in is_gptq path" << "\n";
|
||||||
|
myfile.flush();
|
||||||
dim3 blockDim, gridDim;
|
dim3 blockDim, gridDim;
|
||||||
blockDim.x = EXL2_BLOCK_KN_SIZE;
|
blockDim.x = EXL2_BLOCK_KN_SIZE;
|
||||||
blockDim.y = 1;
|
blockDim.y = 1;
|
||||||
@ -59,6 +59,7 @@ void gemm_half_q_half_cuda_part
|
|||||||
fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);
|
fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);
|
||||||
|
|
||||||
myfile << "launch kernel" << "\n";
|
myfile << "launch kernel" << "\n";
|
||||||
|
myfile.flush();
|
||||||
kernel<<<gridDim, blockDim>>>
|
kernel<<<gridDim, blockDim>>>
|
||||||
(
|
(
|
||||||
a,
|
a,
|
||||||
@ -119,6 +120,7 @@ void gemm_half_q_half_cuda_part
|
|||||||
r_weights_stride
|
r_weights_stride
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
myfile.flush();
|
||||||
myfile.close();
|
myfile.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,9 +109,9 @@ void gemm_half_q_half
|
|||||||
bool force_cuda
|
bool force_cuda
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
ofstream myfile;
|
ofstream myfile("/tgi/server/exllamav2_kernels/log.txt");
|
||||||
myfile.open ("/tgi/server/exllamav2_kernels/log.txt");
|
|
||||||
myfile << "start gemm_half_q_half" << "\n";
|
myfile << "start gemm_half_q_half" << "\n";
|
||||||
|
myfile.flush();
|
||||||
|
|
||||||
QMatrix* qm = reinterpret_cast<QMatrix*> (b);
|
QMatrix* qm = reinterpret_cast<QMatrix*> (b);
|
||||||
|
|
||||||
@ -124,6 +124,7 @@ void gemm_half_q_half
|
|||||||
const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
|
const at::cuda::OptionalCUDAGuard device_guard(device_of(a));
|
||||||
|
|
||||||
myfile << "call gemm_half_q_half_cuda" << "\n";
|
myfile << "call gemm_half_q_half_cuda" << "\n";
|
||||||
|
myfile.flush();
|
||||||
gemm_half_q_half_cuda
|
gemm_half_q_half_cuda
|
||||||
(
|
(
|
||||||
at::cuda::getCurrentCUDABlasHandle(),
|
at::cuda::getCurrentCUDABlasHandle(),
|
||||||
|
Loading…
Reference in New Issue
Block a user