Skip to content

Commit

Permalink
bugfix: fix the prefill/append attention kernel accuracy issue on sm75 (
Browse files Browse the repository at this point in the history
#448)

As reported by @esmeetu , the prefill/append attention kernel produce
incorrect results on sm75. This PR fixes the issue.

We need another round of kernel configuration check before releasing the
official sm75 wheel (e.g., the nthrs per block 1024 is too large for
sm75, we should use smaller values such as 512/256), @zhyncs would you
mind helping with this?
  • Loading branch information
yzh119 authored Aug 16, 2024
1 parent 5f0159e commit 338b2f5
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions include/flashinfer/mma.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ __device__ __forceinline__ void mma_sync_m16n16k16_row_col_f16f16f32(float* C, u
"{%6},"
"{%7, %8, %9, %10};\n"
: "=f"(C[0]), "=f"(C[1]), "=f"(C[2]), "=f"(C[3])
: "r"(A[2]), "r"(A[3]), "r"(B[1]), "f"(0.f), "f"(0.f), "f"(0.f), "f"(0.f));
: "r"(A[2]), "r"(A[3]), "r"(B[1]), "f"(C[0]), "f"(C[1]), "f"(C[2]), "f"(C[3]));
asm volatile(
"mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 "
"{%0, %1, %2, %3},"
Expand All @@ -436,7 +436,7 @@ __device__ __forceinline__ void mma_sync_m16n16k16_row_col_f16f16f32(float* C, u
"{%6},"
"{%7, %8, %9, %10};\n"
: "=f"(C[4]), "=f"(C[5]), "=f"(C[6]), "=f"(C[7])
: "r"(A[2]), "r"(A[3]), "r"(B[3]), "f"(0.f), "f"(0.f), "f"(0.f), "f"(0.f));
: "r"(A[2]), "r"(A[3]), "r"(B[3]), "f"(C[4]), "f"(C[5]), "f"(C[6]), "f"(C[7]));
} else {
asm volatile(
"mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 "
Expand Down

0 comments on commit 338b2f5

Please sign in to comment.