pymc-devs
diff --git a/‎pytensor/link/jax/dispatch/subtensor.py‎
Lines changed: 27 additions & 6 deletions b/‎pytensor/link/jax/dispatch/subtensor.py‎
Lines changed: 27 additions & 6 deletions
diff --git a/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 12 additions & 11 deletions b/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎pytensor/link/pytorch/dispatch/subtensor.py‎
Lines changed: 7 additions & 5 deletions b/‎pytensor/link/pytorch/dispatch/subtensor.py‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎pytensor/sparse/basic.py‎
Lines changed: 123 additions & 4 deletions b/‎pytensor/sparse/basic.py‎
Lines changed: 123 additions & 4 deletions
@@ -31,11 +31,18 @@
 """
 
 
+@jax_funcify.register(AdvancedSubtensor1)
+def jax_funcify_AdvancedSubtensor1(op, node, **kwargs):
+    def advanced_subtensor1(x, ilist):
+        return x[ilist]
+
+    return advanced_subtensor1
+
+
 @jax_funcify.register(Subtensor)
 @jax_funcify.register(AdvancedSubtensor)
-@jax_funcify.register(AdvancedSubtensor1)
 def jax_funcify_Subtensor(op, node, **kwargs):
-    idx_list = getattr(op, "idx_list", None)
+    idx_list = op.idx_list
 
     def subtensor(x, *ilists):
         indices = indices_from_subtensor(ilists, idx_list)
@@ -47,10 +54,24 @@ def subtensor(x, *ilists):
     return subtensor
 
 
-@jax_funcify.register(IncSubtensor)
 @jax_funcify.register(AdvancedIncSubtensor1)
+def jax_funcify_AdvancedIncSubtensor1(op, node, **kwargs):
+    if getattr(op, "set_instead_of_inc", False):
+
+        def jax_fn(x, y, ilist):
+            return x.at[ilist].set(y)
+
+    else:
+
+        def jax_fn(x, y, ilist):
+            return x.at[ilist].add(y)
+
+    return jax_fn
+
+
+@jax_funcify.register(IncSubtensor)
 def jax_funcify_IncSubtensor(op, node, **kwargs):
-    idx_list = getattr(op, "idx_list", None)
+    idx_list = op.idx_list
 
     if getattr(op, "set_instead_of_inc", False):
 
@@ -77,8 +98,8 @@ def incsubtensor(x, y, *ilist, jax_fn=jax_fn, idx_list=idx_list):
 
 @jax_funcify.register(AdvancedIncSubtensor)
 def jax_funcify_AdvancedIncSubtensor(op, node, **kwargs):
-    idx_list = getattr(op, "idx_list", None)
-    
+    idx_list = op.idx_list
+
     if getattr(op, "set_instead_of_inc", False):
 
         def jax_fn(x, indices, y):
 
@@ -20,7 +20,6 @@
 )
 from pytensor.link.numba.dispatch.compile_ops import numba_deepcopy
 from pytensor.tensor import TensorType
-from pytensor.tensor.rewriting.subtensor import is_full_slice
 from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
@@ -29,7 +28,7 @@
     IncSubtensor,
     Subtensor,
 )
-from pytensor.tensor.type_other import MakeSlice, NoneTypeT, SliceType
+from pytensor.tensor.type_other import MakeSlice
 
 
 def slice_new(self, start, stop, step):
@@ -239,15 +238,15 @@ def {function_name}({", ".join(input_names)}):
 @register_funcify_and_cache_key(AdvancedIncSubtensor)
 def numba_funcify_AdvancedSubtensor(op, node, **kwargs):
     if isinstance(op, AdvancedSubtensor):
-        x, y, tensor_inputs = node.inputs[0], None, node.inputs[1:]
+        _, _, tensor_inputs = node.inputs[0], None, node.inputs[1:]
     else:
-        x, y, *tensor_inputs = node.inputs
+        _, _, *tensor_inputs = node.inputs
 
     # Reconstruct indexing information from idx_list and tensor inputs
     basic_idxs = []
     adv_idxs = []
     input_idx = 0
-    
+
     for i, entry in enumerate(op.idx_list):
         if isinstance(entry, slice):
             # Basic slice index
@@ -256,12 +255,14 @@ def numba_funcify_AdvancedSubtensor(op, node, **kwargs):
             # Advanced tensor index
             if input_idx < len(tensor_inputs):
                 idx_input = tensor_inputs[input_idx]
-                adv_idxs.append({
-                    "axis": i,
-                    "dtype": idx_input.type.dtype,
-                    "bcast": idx_input.type.broadcastable,
-                    "ndim": idx_input.type.ndim,
-                })
+                adv_idxs.append(
+                    {
+                        "axis": i,
+                        "dtype": idx_input.type.dtype,
+                        "bcast": idx_input.type.broadcastable,
+                        "ndim": idx_input.type.ndim,
+                    }
+                )
                 input_idx += 1
 
     # Special implementation for consecutive integer vector indices
 
@@ -9,7 +9,7 @@
     Subtensor,
     indices_from_subtensor,
 )
-from pytensor.tensor.type_other import MakeSlice, SliceType
+from pytensor.tensor.type_other import MakeSlice
 
 
 def check_negative_steps(indices):
@@ -63,8 +63,8 @@ def makeslice(start, stop, step):
 @pytorch_funcify.register(AdvancedSubtensor1)
 @pytorch_funcify.register(AdvancedSubtensor)
 def pytorch_funcify_AdvSubtensor(op, node, **kwargs):
-    idx_list = getattr(op, "idx_list", None)
-    
+    idx_list = op.idx_list
+
     def advsubtensor(x, *flattened_indices):
         indices = indices_from_subtensor(flattened_indices, idx_list)
         check_negative_steps(indices)
@@ -105,7 +105,7 @@ def inc_subtensor(x, y, *flattened_indices):
 @pytorch_funcify.register(AdvancedIncSubtensor)
 @pytorch_funcify.register(AdvancedIncSubtensor1)
 def pytorch_funcify_AdvancedIncSubtensor(op, node, **kwargs):
-    idx_list = getattr(op, "idx_list", None)
+    idx_list = op.idx_list
     inplace = op.inplace
     ignore_duplicates = getattr(op, "ignore_duplicates", False)
 
@@ -139,7 +139,9 @@ def adv_inc_subtensor_no_duplicates(x, y, *flattened_indices):
 
     else:
         # Check if we have slice indexing in idx_list
-        has_slice_indexing = any(isinstance(entry, slice) for entry in idx_list) if idx_list else False
+        has_slice_indexing = (
+            any(isinstance(entry, slice) for entry in idx_list) if idx_list else False
+        )
         if has_slice_indexing:
             raise NotImplementedError(
                 "IncSubtensor with potential duplicates indexes and slice indexing not implemented in PyTorch"
 
@@ -1317,8 +1317,6 @@ def perform(self, node, inputs, outputs):
         z[0] = y
 
     def grad(self, inputs, gout):
-        from pytensor.sparse.math import sp_sum
-
         (x, s) = inputs
         (gz,) = gout
         return [col_scale(gz, s), sp_sum(x * gz, axis=0)]
@@ -1368,8 +1366,6 @@ def perform(self, node, inputs, outputs):
         z[0] = scipy.sparse.csc_matrix((y_data, indices, indptr), (M, N))
 
     def grad(self, inputs, gout):
-        from pytensor.sparse.math import sp_sum
-
         (x, s) = inputs
         (gz,) = gout
         return [row_scale(gz, s), sp_sum(x * gz, axis=1)]
@@ -1435,6 +1431,126 @@ def row_scale(x, s):
     return col_scale(x.T, s).T
 
 
+class SpSum(Op):
+    """
+
+    WARNING: judgement call...
+    We are not using the structured in the comparison or hashing
+    because it doesn't change the perform method therefore, we
+    *do* want Sums with different structured values to be merged
+    by the merge optimization and this requires them to compare equal.
+    """
+
+    __props__ = ("axis",)
+
+    def __init__(self, axis=None, sparse_grad=True):
+        super().__init__()
+        self.axis = axis
+        self.structured = sparse_grad
+        if self.axis not in (None, 0, 1):
+            raise ValueError("Illegal value for self.axis.")
+
+    def make_node(self, x):
+        x = as_sparse_variable(x)
+        assert x.format in ("csr", "csc")
+
+        if self.axis is not None:
+            out_shape = (None,)
+        else:
+            out_shape = ()
+
+        z = TensorType(dtype=x.dtype, shape=out_shape)()
+        return Apply(self, [x], [z])
+
+    def perform(self, node, inputs, outputs):
+        (x,) = inputs
+        (z,) = outputs
+        if self.axis is None:
+            z[0] = np.asarray(x.sum())
+        else:
+            z[0] = np.asarray(x.sum(self.axis)).ravel()
+
+    def grad(self, inputs, gout):
+        (x,) = inputs
+        (gz,) = gout
+        if x.dtype not in continuous_dtypes:
+            return [x.zeros_like(dtype=config.floatX)]
+        if self.structured:
+            if self.axis is None:
+                r = gz * sp_ones_like(x)
+            elif self.axis == 0:
+                r = col_scale(sp_ones_like(x), gz)
+            elif self.axis == 1:
+                r = row_scale(sp_ones_like(x), gz)
+            else:
+                raise ValueError("Illegal value for self.axis.")
+        else:
+            o_format = x.format
+            x = dense_from_sparse(x)
+            if _is_sparse_variable(gz):
+                gz = dense_from_sparse(gz)
+            if self.axis is None:
+                r = ptb.second(x, gz)
+            else:
+                ones = ptb.ones_like(x)
+                if self.axis == 0:
+                    r = specify_broadcastable(gz.dimshuffle("x", 0), 0) * ones
+                elif self.axis == 1:
+                    r = specify_broadcastable(gz.dimshuffle(0, "x"), 1) * ones
+                else:
+                    raise ValueError("Illegal value for self.axis.")
+            r = SparseFromDense(o_format)(r)
+        return [r]
+
+    def infer_shape(self, fgraph, node, shapes):
+        r = None
+        if self.axis is None:
+            r = [()]
+        elif self.axis == 0:
+            r = [(shapes[0][1],)]
+        else:
+            r = [(shapes[0][0],)]
+        return r
+
+    def __str__(self):
+        return f"{self.__class__.__name__}{{axis={self.axis}}}"
+
+
+def sp_sum(x, axis=None, sparse_grad=False):
+    """
+    Calculate the sum of a sparse matrix along the specified axis.
+
+    It operates a reduction along the specified axis. When `axis` is `None`,
+    it is applied along all axes.
+
+    Parameters
+    ----------
+    x
+        Sparse matrix.
+    axis
+        Axis along which the sum is applied. Integer or `None`.
+    sparse_grad : bool
+        `True` to have a structured grad.
+
+    Returns
+    -------
+    object
+        The sum of `x` in a dense format.
+
+    Notes
+    -----
+    The grad implementation is controlled with the `sparse_grad` parameter.
+    `True` will provide a structured grad and `False` will provide a regular
+    grad. For both choices, the grad returns a sparse matrix having the same
+    format as `x`.
+
+    This op does not return a sparse matrix, but a dense tensor matrix.
+
+    """
+
+    return SpSum(axis, sparse_grad)(x)
+
+
 class Diag(Op):
     """Extract the diagonal of a square sparse matrix as a dense vector.
 
@@ -1944,3 +2060,6 @@ def grad(self, inputs, grads):
 
 
 construct_sparse_from_list = ConstructSparseFromList()
+
+# Import sp_sum from math to maintain backward compatibility
+# This must be at the end to avoid circular imports