From 4aaf72776e88eab0dad3cf5934acc1b5000a3cbd Mon Sep 17 00:00:00 2001 From: datavorous Date: Sun, 29 Mar 2026 16:13:21 +0530 Subject: [PATCH] Fix matmul_4bit gemv path for mismatched quant_state layout --- bitsandbytes/autograd/_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py index 87f41fbc9..6ed35f992 100644 --- a/bitsandbytes/autograd/_functions.py +++ b/bitsandbytes/autograd/_functions.py @@ -400,6 +400,8 @@ def matmul_4bit( f"Some matrices hidden dimension is not a multiple of {quant_state.blocksize} and efficient inference kernels are not supported for these (slow). Matrix input size found: {A.shape}", ) return MatMul4Bit.apply(A, B, out, bias, quant_state) + if A.shape[-1] != quant_state.shape[1]: + return MatMul4Bit.apply(A, B, out, bias, quant_state) else: out = F.gemv_4bit(A, B.t(), out, state=quant_state) if bias is not None: