Skip to content

Commit

Permalink
tuning adjustment for quantized skinny gemm. (#444)
Browse files Browse the repository at this point in the history
* tuning adjustment for quantized skinny gemm.

* lint fix
  • Loading branch information
amd-hhashemi authored Feb 28, 2025
1 parent 386763c commit fd70f59
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion csrc/rocm/custom_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1715,7 +1715,7 @@ void wvSpltKQ_(void* in_a, void* in_b, void* out_c, void* scale_a,
dim3 block(64, _WvPrGrp); \
if ((K_in * N_in <= 32 * 1024) && (M_in % _YTILEs == 0)) { \
int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \
wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 16, _UNRLs, _N> \
wvSpltKQ_hf_sml_<64, _YTILEs, _WvPrGrp, 8, _UNRLs, _N> \
<<<grid, block, 0, stream>>>(K_in, Kp_in, M_in, af4, bf4, c, s_a, \
s_b, __wvPrGrp, Otp_in, CuCount); \
} else { \
Expand Down

0 comments on commit fd70f59

Please sign in to comment.