From 4aaf72776e88eab0dad3cf5934acc1b5000a3cbd Mon Sep 17 00:00:00 2001
From: datavorous <datavorous.work@gmail.com>
Date: Sun, 29 Mar 2026 16:13:21 +0530
Subject: [PATCH] Fix matmul_4bit gemv path for mismatched quant_state layout

---
 bitsandbytes/autograd/_functions.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
index 87f41fbc9..6ed35f992 100644
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -400,6 +400,8 @@ def matmul_4bit(
                 f"Some matrices hidden dimension is not a multiple of {quant_state.blocksize} and efficient inference kernels are not supported for these (slow). Matrix input size found: {A.shape}",
             )
             return MatMul4Bit.apply(A, B, out, bias, quant_state)
+        if A.shape[-1] != quant_state.shape[1]:
+            return MatMul4Bit.apply(A, B, out, bias, quant_state)
         else:
             out = F.gemv_4bit(A, B.t(), out, state=quant_state)
             if bias is not None: